memory: add error propagation to file-based RAM allocation
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #include "hw/qdev.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
54
55 #include "qemu/range.h"
56
57 //#define DEBUG_SUBPAGE
58
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
61
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
66
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
69
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
72
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
75
76 #endif
77
78 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
79 /* current CPU in the current thread. It is only valid inside
80 cpu_exec() */
81 DEFINE_TLS(CPUState *, current_cpu);
82 /* 0 = Do not count executed instructions.
83 1 = Precise instruction counting.
84 2 = Adaptive rate instruction counting. */
85 int use_icount;
86
87 #if !defined(CONFIG_USER_ONLY)
88
89 typedef struct PhysPageEntry PhysPageEntry;
90
91 struct PhysPageEntry {
92 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
93 uint32_t skip : 6;
94 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 uint32_t ptr : 26;
96 };
97
98 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
99
100 /* Size of the L2 (and L3, etc) page tables. */
101 #define ADDR_SPACE_BITS 64
102
103 #define P_L2_BITS 9
104 #define P_L2_SIZE (1 << P_L2_BITS)
105
106 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
107
108 typedef PhysPageEntry Node[P_L2_SIZE];
109
110 typedef struct PhysPageMap {
111 unsigned sections_nb;
112 unsigned sections_nb_alloc;
113 unsigned nodes_nb;
114 unsigned nodes_nb_alloc;
115 Node *nodes;
116 MemoryRegionSection *sections;
117 } PhysPageMap;
118
119 struct AddressSpaceDispatch {
120 /* This is a multi-level map on the physical address space.
121 * The bottom level has pointers to MemoryRegionSections.
122 */
123 PhysPageEntry phys_map;
124 PhysPageMap map;
125 AddressSpace *as;
126 };
127
128 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
129 typedef struct subpage_t {
130 MemoryRegion iomem;
131 AddressSpace *as;
132 hwaddr base;
133 uint16_t sub_section[TARGET_PAGE_SIZE];
134 } subpage_t;
135
136 #define PHYS_SECTION_UNASSIGNED 0
137 #define PHYS_SECTION_NOTDIRTY 1
138 #define PHYS_SECTION_ROM 2
139 #define PHYS_SECTION_WATCH 3
140
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143 static void tcg_commit(MemoryListener *listener);
144
145 static MemoryRegion io_mem_watch;
146 #endif
147
148 #if !defined(CONFIG_USER_ONLY)
149
150 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
151 {
152 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
153 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
154 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
155 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 }
157 }
158
159 static uint32_t phys_map_node_alloc(PhysPageMap *map)
160 {
161 unsigned i;
162 uint32_t ret;
163
164 ret = map->nodes_nb++;
165 assert(ret != PHYS_MAP_NODE_NIL);
166 assert(ret != map->nodes_nb_alloc);
167 for (i = 0; i < P_L2_SIZE; ++i) {
168 map->nodes[ret][i].skip = 1;
169 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
170 }
171 return ret;
172 }
173
174 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
175 hwaddr *index, hwaddr *nb, uint16_t leaf,
176 int level)
177 {
178 PhysPageEntry *p;
179 int i;
180 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
181
182 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
183 lp->ptr = phys_map_node_alloc(map);
184 p = map->nodes[lp->ptr];
185 if (level == 0) {
186 for (i = 0; i < P_L2_SIZE; i++) {
187 p[i].skip = 0;
188 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 }
190 }
191 } else {
192 p = map->nodes[lp->ptr];
193 }
194 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
195
196 while (*nb && lp < &p[P_L2_SIZE]) {
197 if ((*index & (step - 1)) == 0 && *nb >= step) {
198 lp->skip = 0;
199 lp->ptr = leaf;
200 *index += step;
201 *nb -= step;
202 } else {
203 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
204 }
205 ++lp;
206 }
207 }
208
209 static void phys_page_set(AddressSpaceDispatch *d,
210 hwaddr index, hwaddr nb,
211 uint16_t leaf)
212 {
213 /* Wildly overreserve - it doesn't matter much. */
214 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
215
216 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
217 }
218
219 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
220 * and update our entry so we can skip it and go directly to the destination.
221 */
222 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
223 {
224 unsigned valid_ptr = P_L2_SIZE;
225 int valid = 0;
226 PhysPageEntry *p;
227 int i;
228
229 if (lp->ptr == PHYS_MAP_NODE_NIL) {
230 return;
231 }
232
233 p = nodes[lp->ptr];
234 for (i = 0; i < P_L2_SIZE; i++) {
235 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
236 continue;
237 }
238
239 valid_ptr = i;
240 valid++;
241 if (p[i].skip) {
242 phys_page_compact(&p[i], nodes, compacted);
243 }
244 }
245
246 /* We can only compress if there's only one child. */
247 if (valid != 1) {
248 return;
249 }
250
251 assert(valid_ptr < P_L2_SIZE);
252
253 /* Don't compress if it won't fit in the # of bits we have. */
254 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 return;
256 }
257
258 lp->ptr = p[valid_ptr].ptr;
259 if (!p[valid_ptr].skip) {
260 /* If our only child is a leaf, make this a leaf. */
261 /* By design, we should have made this node a leaf to begin with so we
262 * should never reach here.
263 * But since it's so simple to handle this, let's do it just in case we
264 * change this rule.
265 */
266 lp->skip = 0;
267 } else {
268 lp->skip += p[valid_ptr].skip;
269 }
270 }
271
272 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
273 {
274 DECLARE_BITMAP(compacted, nodes_nb);
275
276 if (d->phys_map.skip) {
277 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 }
279 }
280
281 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
282 Node *nodes, MemoryRegionSection *sections)
283 {
284 PhysPageEntry *p;
285 hwaddr index = addr >> TARGET_PAGE_BITS;
286 int i;
287
288 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
289 if (lp.ptr == PHYS_MAP_NODE_NIL) {
290 return &sections[PHYS_SECTION_UNASSIGNED];
291 }
292 p = nodes[lp.ptr];
293 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
294 }
295
296 if (sections[lp.ptr].size.hi ||
297 range_covers_byte(sections[lp.ptr].offset_within_address_space,
298 sections[lp.ptr].size.lo, addr)) {
299 return &sections[lp.ptr];
300 } else {
301 return &sections[PHYS_SECTION_UNASSIGNED];
302 }
303 }
304
305 bool memory_region_is_unassigned(MemoryRegion *mr)
306 {
307 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
308 && mr != &io_mem_watch;
309 }
310
311 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
312 hwaddr addr,
313 bool resolve_subpage)
314 {
315 MemoryRegionSection *section;
316 subpage_t *subpage;
317
318 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
319 if (resolve_subpage && section->mr->subpage) {
320 subpage = container_of(section->mr, subpage_t, iomem);
321 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
322 }
323 return section;
324 }
325
326 static MemoryRegionSection *
327 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
328 hwaddr *plen, bool resolve_subpage)
329 {
330 MemoryRegionSection *section;
331 Int128 diff;
332
333 section = address_space_lookup_region(d, addr, resolve_subpage);
334 /* Compute offset within MemoryRegionSection */
335 addr -= section->offset_within_address_space;
336
337 /* Compute offset within MemoryRegion */
338 *xlat = addr + section->offset_within_region;
339
340 diff = int128_sub(section->mr->size, int128_make64(addr));
341 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 return section;
343 }
344
345 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
346 {
347 if (memory_region_is_ram(mr)) {
348 return !(is_write && mr->readonly);
349 }
350 if (memory_region_is_romd(mr)) {
351 return !is_write;
352 }
353
354 return false;
355 }
356
357 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
358 hwaddr *xlat, hwaddr *plen,
359 bool is_write)
360 {
361 IOMMUTLBEntry iotlb;
362 MemoryRegionSection *section;
363 MemoryRegion *mr;
364 hwaddr len = *plen;
365
366 for (;;) {
367 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
368 mr = section->mr;
369
370 if (!mr->iommu_ops) {
371 break;
372 }
373
374 iotlb = mr->iommu_ops->translate(mr, addr);
375 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
376 | (addr & iotlb.addr_mask));
377 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
378 if (!(iotlb.perm & (1 << is_write))) {
379 mr = &io_mem_unassigned;
380 break;
381 }
382
383 as = iotlb.target_as;
384 }
385
386 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
387 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
388 len = MIN(page, len);
389 }
390
391 *plen = len;
392 *xlat = addr;
393 return mr;
394 }
395
396 MemoryRegionSection *
397 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
398 hwaddr *plen)
399 {
400 MemoryRegionSection *section;
401 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
402
403 assert(!section->mr->iommu_ops);
404 return section;
405 }
406 #endif
407
408 void cpu_exec_init_all(void)
409 {
410 #if !defined(CONFIG_USER_ONLY)
411 qemu_mutex_init(&ram_list.mutex);
412 memory_map_init();
413 io_mem_init();
414 #endif
415 }
416
417 #if !defined(CONFIG_USER_ONLY)
418
419 static int cpu_common_post_load(void *opaque, int version_id)
420 {
421 CPUState *cpu = opaque;
422
423 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
424 version_id is increased. */
425 cpu->interrupt_request &= ~0x01;
426 tlb_flush(cpu, 1);
427
428 return 0;
429 }
430
431 const VMStateDescription vmstate_cpu_common = {
432 .name = "cpu_common",
433 .version_id = 1,
434 .minimum_version_id = 1,
435 .post_load = cpu_common_post_load,
436 .fields = (VMStateField[]) {
437 VMSTATE_UINT32(halted, CPUState),
438 VMSTATE_UINT32(interrupt_request, CPUState),
439 VMSTATE_END_OF_LIST()
440 }
441 };
442
443 #endif
444
445 CPUState *qemu_get_cpu(int index)
446 {
447 CPUState *cpu;
448
449 CPU_FOREACH(cpu) {
450 if (cpu->cpu_index == index) {
451 return cpu;
452 }
453 }
454
455 return NULL;
456 }
457
458 #if !defined(CONFIG_USER_ONLY)
459 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
460 {
461 /* We only support one address space per cpu at the moment. */
462 assert(cpu->as == as);
463
464 if (cpu->tcg_as_listener) {
465 memory_listener_unregister(cpu->tcg_as_listener);
466 } else {
467 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
468 }
469 cpu->tcg_as_listener->commit = tcg_commit;
470 memory_listener_register(cpu->tcg_as_listener, as);
471 }
472 #endif
473
474 void cpu_exec_init(CPUArchState *env)
475 {
476 CPUState *cpu = ENV_GET_CPU(env);
477 CPUClass *cc = CPU_GET_CLASS(cpu);
478 CPUState *some_cpu;
479 int cpu_index;
480
481 #if defined(CONFIG_USER_ONLY)
482 cpu_list_lock();
483 #endif
484 cpu_index = 0;
485 CPU_FOREACH(some_cpu) {
486 cpu_index++;
487 }
488 cpu->cpu_index = cpu_index;
489 cpu->numa_node = 0;
490 QTAILQ_INIT(&cpu->breakpoints);
491 QTAILQ_INIT(&cpu->watchpoints);
492 #ifndef CONFIG_USER_ONLY
493 cpu->as = &address_space_memory;
494 cpu->thread_id = qemu_get_thread_id();
495 #endif
496 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
497 #if defined(CONFIG_USER_ONLY)
498 cpu_list_unlock();
499 #endif
500 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
501 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
502 }
503 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
504 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
505 cpu_save, cpu_load, env);
506 assert(cc->vmsd == NULL);
507 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
508 #endif
509 if (cc->vmsd != NULL) {
510 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
511 }
512 }
513
514 #if defined(TARGET_HAS_ICE)
515 #if defined(CONFIG_USER_ONLY)
516 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
517 {
518 tb_invalidate_phys_page_range(pc, pc + 1, 0);
519 }
520 #else
521 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
522 {
523 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
524 if (phys != -1) {
525 tb_invalidate_phys_addr(cpu->as,
526 phys | (pc & ~TARGET_PAGE_MASK));
527 }
528 }
529 #endif
530 #endif /* TARGET_HAS_ICE */
531
532 #if defined(CONFIG_USER_ONLY)
533 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
534
535 {
536 }
537
538 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
539 int flags, CPUWatchpoint **watchpoint)
540 {
541 return -ENOSYS;
542 }
543 #else
544 /* Add a watchpoint. */
545 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
546 int flags, CPUWatchpoint **watchpoint)
547 {
548 vaddr len_mask = ~(len - 1);
549 CPUWatchpoint *wp;
550
551 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
552 if ((len & (len - 1)) || (addr & ~len_mask) ||
553 len == 0 || len > TARGET_PAGE_SIZE) {
554 error_report("tried to set invalid watchpoint at %"
555 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
556 return -EINVAL;
557 }
558 wp = g_malloc(sizeof(*wp));
559
560 wp->vaddr = addr;
561 wp->len_mask = len_mask;
562 wp->flags = flags;
563
564 /* keep all GDB-injected watchpoints in front */
565 if (flags & BP_GDB) {
566 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
567 } else {
568 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 }
570
571 tlb_flush_page(cpu, addr);
572
573 if (watchpoint)
574 *watchpoint = wp;
575 return 0;
576 }
577
578 /* Remove a specific watchpoint. */
579 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
580 int flags)
581 {
582 vaddr len_mask = ~(len - 1);
583 CPUWatchpoint *wp;
584
585 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
586 if (addr == wp->vaddr && len_mask == wp->len_mask
587 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
588 cpu_watchpoint_remove_by_ref(cpu, wp);
589 return 0;
590 }
591 }
592 return -ENOENT;
593 }
594
595 /* Remove a specific watchpoint by reference. */
596 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
597 {
598 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
599
600 tlb_flush_page(cpu, watchpoint->vaddr);
601
602 g_free(watchpoint);
603 }
604
605 /* Remove all matching watchpoints. */
606 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
607 {
608 CPUWatchpoint *wp, *next;
609
610 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
611 if (wp->flags & mask) {
612 cpu_watchpoint_remove_by_ref(cpu, wp);
613 }
614 }
615 }
616 #endif
617
618 /* Add a breakpoint. */
619 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
620 CPUBreakpoint **breakpoint)
621 {
622 #if defined(TARGET_HAS_ICE)
623 CPUBreakpoint *bp;
624
625 bp = g_malloc(sizeof(*bp));
626
627 bp->pc = pc;
628 bp->flags = flags;
629
630 /* keep all GDB-injected breakpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
635 }
636
637 breakpoint_invalidate(cpu, pc);
638
639 if (breakpoint) {
640 *breakpoint = bp;
641 }
642 return 0;
643 #else
644 return -ENOSYS;
645 #endif
646 }
647
648 /* Remove a specific breakpoint. */
649 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
650 {
651 #if defined(TARGET_HAS_ICE)
652 CPUBreakpoint *bp;
653
654 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
655 if (bp->pc == pc && bp->flags == flags) {
656 cpu_breakpoint_remove_by_ref(cpu, bp);
657 return 0;
658 }
659 }
660 return -ENOENT;
661 #else
662 return -ENOSYS;
663 #endif
664 }
665
666 /* Remove a specific breakpoint by reference. */
667 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
668 {
669 #if defined(TARGET_HAS_ICE)
670 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
671
672 breakpoint_invalidate(cpu, breakpoint->pc);
673
674 g_free(breakpoint);
675 #endif
676 }
677
678 /* Remove all matching breakpoints. */
679 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
680 {
681 #if defined(TARGET_HAS_ICE)
682 CPUBreakpoint *bp, *next;
683
684 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
685 if (bp->flags & mask) {
686 cpu_breakpoint_remove_by_ref(cpu, bp);
687 }
688 }
689 #endif
690 }
691
692 /* enable or disable single step mode. EXCP_DEBUG is returned by the
693 CPU loop after each instruction */
694 void cpu_single_step(CPUState *cpu, int enabled)
695 {
696 #if defined(TARGET_HAS_ICE)
697 if (cpu->singlestep_enabled != enabled) {
698 cpu->singlestep_enabled = enabled;
699 if (kvm_enabled()) {
700 kvm_update_guest_debug(cpu, 0);
701 } else {
702 /* must flush all the translated code to avoid inconsistencies */
703 /* XXX: only flush what is necessary */
704 CPUArchState *env = cpu->env_ptr;
705 tb_flush(env);
706 }
707 }
708 #endif
709 }
710
711 void cpu_abort(CPUState *cpu, const char *fmt, ...)
712 {
713 va_list ap;
714 va_list ap2;
715
716 va_start(ap, fmt);
717 va_copy(ap2, ap);
718 fprintf(stderr, "qemu: fatal: ");
719 vfprintf(stderr, fmt, ap);
720 fprintf(stderr, "\n");
721 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 if (qemu_log_enabled()) {
723 qemu_log("qemu: fatal: ");
724 qemu_log_vprintf(fmt, ap2);
725 qemu_log("\n");
726 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
727 qemu_log_flush();
728 qemu_log_close();
729 }
730 va_end(ap2);
731 va_end(ap);
732 #if defined(CONFIG_USER_ONLY)
733 {
734 struct sigaction act;
735 sigfillset(&act.sa_mask);
736 act.sa_handler = SIG_DFL;
737 sigaction(SIGABRT, &act, NULL);
738 }
739 #endif
740 abort();
741 }
742
743 #if !defined(CONFIG_USER_ONLY)
744 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
745 {
746 RAMBlock *block;
747
748 /* The list is protected by the iothread lock here. */
749 block = ram_list.mru_block;
750 if (block && addr - block->offset < block->length) {
751 goto found;
752 }
753 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
754 if (addr - block->offset < block->length) {
755 goto found;
756 }
757 }
758
759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
760 abort();
761
762 found:
763 ram_list.mru_block = block;
764 return block;
765 }
766
767 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
768 {
769 ram_addr_t start1;
770 RAMBlock *block;
771 ram_addr_t end;
772
773 end = TARGET_PAGE_ALIGN(start + length);
774 start &= TARGET_PAGE_MASK;
775
776 block = qemu_get_ram_block(start);
777 assert(block == qemu_get_ram_block(end - 1));
778 start1 = (uintptr_t)block->host + (start - block->offset);
779 cpu_tlb_reset_dirty_all(start1, length);
780 }
781
782 /* Note: start and end must be within the same ram block. */
783 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
784 unsigned client)
785 {
786 if (length == 0)
787 return;
788 cpu_physical_memory_clear_dirty_range(start, length, client);
789
790 if (tcg_enabled()) {
791 tlb_reset_dirty_range_all(start, length);
792 }
793 }
794
795 static void cpu_physical_memory_set_dirty_tracking(bool enable)
796 {
797 in_migration = enable;
798 }
799
800 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
801 MemoryRegionSection *section,
802 target_ulong vaddr,
803 hwaddr paddr, hwaddr xlat,
804 int prot,
805 target_ulong *address)
806 {
807 hwaddr iotlb;
808 CPUWatchpoint *wp;
809
810 if (memory_region_is_ram(section->mr)) {
811 /* Normal RAM. */
812 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
813 + xlat;
814 if (!section->readonly) {
815 iotlb |= PHYS_SECTION_NOTDIRTY;
816 } else {
817 iotlb |= PHYS_SECTION_ROM;
818 }
819 } else {
820 iotlb = section - section->address_space->dispatch->map.sections;
821 iotlb += xlat;
822 }
823
824 /* Make accesses to pages with watchpoints go via the
825 watchpoint trap routines. */
826 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
827 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
828 /* Avoid trapping reads of pages with a write breakpoint. */
829 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
830 iotlb = PHYS_SECTION_WATCH + paddr;
831 *address |= TLB_MMIO;
832 break;
833 }
834 }
835 }
836
837 return iotlb;
838 }
839 #endif /* defined(CONFIG_USER_ONLY) */
840
841 #if !defined(CONFIG_USER_ONLY)
842
843 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
844 uint16_t section);
845 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
846
847 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
848
849 /*
850 * Set a custom physical guest memory alloator.
851 * Accelerators with unusual needs may need this. Hopefully, we can
852 * get rid of it eventually.
853 */
854 void phys_mem_set_alloc(void *(*alloc)(size_t))
855 {
856 phys_mem_alloc = alloc;
857 }
858
859 static uint16_t phys_section_add(PhysPageMap *map,
860 MemoryRegionSection *section)
861 {
862 /* The physical section number is ORed with a page-aligned
863 * pointer to produce the iotlb entries. Thus it should
864 * never overflow into the page-aligned value.
865 */
866 assert(map->sections_nb < TARGET_PAGE_SIZE);
867
868 if (map->sections_nb == map->sections_nb_alloc) {
869 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
870 map->sections = g_renew(MemoryRegionSection, map->sections,
871 map->sections_nb_alloc);
872 }
873 map->sections[map->sections_nb] = *section;
874 memory_region_ref(section->mr);
875 return map->sections_nb++;
876 }
877
878 static void phys_section_destroy(MemoryRegion *mr)
879 {
880 memory_region_unref(mr);
881
882 if (mr->subpage) {
883 subpage_t *subpage = container_of(mr, subpage_t, iomem);
884 memory_region_destroy(&subpage->iomem);
885 g_free(subpage);
886 }
887 }
888
889 static void phys_sections_free(PhysPageMap *map)
890 {
891 while (map->sections_nb > 0) {
892 MemoryRegionSection *section = &map->sections[--map->sections_nb];
893 phys_section_destroy(section->mr);
894 }
895 g_free(map->sections);
896 g_free(map->nodes);
897 }
898
899 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
900 {
901 subpage_t *subpage;
902 hwaddr base = section->offset_within_address_space
903 & TARGET_PAGE_MASK;
904 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
905 d->map.nodes, d->map.sections);
906 MemoryRegionSection subsection = {
907 .offset_within_address_space = base,
908 .size = int128_make64(TARGET_PAGE_SIZE),
909 };
910 hwaddr start, end;
911
912 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
913
914 if (!(existing->mr->subpage)) {
915 subpage = subpage_init(d->as, base);
916 subsection.address_space = d->as;
917 subsection.mr = &subpage->iomem;
918 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
919 phys_section_add(&d->map, &subsection));
920 } else {
921 subpage = container_of(existing->mr, subpage_t, iomem);
922 }
923 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
924 end = start + int128_get64(section->size) - 1;
925 subpage_register(subpage, start, end,
926 phys_section_add(&d->map, section));
927 }
928
929
930 static void register_multipage(AddressSpaceDispatch *d,
931 MemoryRegionSection *section)
932 {
933 hwaddr start_addr = section->offset_within_address_space;
934 uint16_t section_index = phys_section_add(&d->map, section);
935 uint64_t num_pages = int128_get64(int128_rshift(section->size,
936 TARGET_PAGE_BITS));
937
938 assert(num_pages);
939 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
940 }
941
942 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
943 {
944 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
945 AddressSpaceDispatch *d = as->next_dispatch;
946 MemoryRegionSection now = *section, remain = *section;
947 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
948
949 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
950 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
951 - now.offset_within_address_space;
952
953 now.size = int128_min(int128_make64(left), now.size);
954 register_subpage(d, &now);
955 } else {
956 now.size = int128_zero();
957 }
958 while (int128_ne(remain.size, now.size)) {
959 remain.size = int128_sub(remain.size, now.size);
960 remain.offset_within_address_space += int128_get64(now.size);
961 remain.offset_within_region += int128_get64(now.size);
962 now = remain;
963 if (int128_lt(remain.size, page_size)) {
964 register_subpage(d, &now);
965 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
966 now.size = page_size;
967 register_subpage(d, &now);
968 } else {
969 now.size = int128_and(now.size, int128_neg(page_size));
970 register_multipage(d, &now);
971 }
972 }
973 }
974
975 void qemu_flush_coalesced_mmio_buffer(void)
976 {
977 if (kvm_enabled())
978 kvm_flush_coalesced_mmio_buffer();
979 }
980
981 void qemu_mutex_lock_ramlist(void)
982 {
983 qemu_mutex_lock(&ram_list.mutex);
984 }
985
986 void qemu_mutex_unlock_ramlist(void)
987 {
988 qemu_mutex_unlock(&ram_list.mutex);
989 }
990
991 #ifdef __linux__
992
993 #include <sys/vfs.h>
994
995 #define HUGETLBFS_MAGIC 0x958458f6
996
997 static long gethugepagesize(const char *path)
998 {
999 struct statfs fs;
1000 int ret;
1001
1002 do {
1003 ret = statfs(path, &fs);
1004 } while (ret != 0 && errno == EINTR);
1005
1006 if (ret != 0) {
1007 perror(path);
1008 return 0;
1009 }
1010
1011 if (fs.f_type != HUGETLBFS_MAGIC)
1012 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1013
1014 return fs.f_bsize;
1015 }
1016
1017 static void *file_ram_alloc(RAMBlock *block,
1018 ram_addr_t memory,
1019 const char *path,
1020 Error **errp)
1021 {
1022 char *filename;
1023 char *sanitized_name;
1024 char *c;
1025 void *area;
1026 int fd;
1027 unsigned long hpagesize;
1028
1029 hpagesize = gethugepagesize(path);
1030 if (!hpagesize) {
1031 goto error;
1032 }
1033
1034 if (memory < hpagesize) {
1035 return NULL;
1036 }
1037
1038 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1039 error_setg(errp,
1040 "host lacks kvm mmu notifiers, -mem-path unsupported");
1041 goto error;
1042 }
1043
1044 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1045 sanitized_name = g_strdup(block->mr->name);
1046 for (c = sanitized_name; *c != '\0'; c++) {
1047 if (*c == '/')
1048 *c = '_';
1049 }
1050
1051 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1052 sanitized_name);
1053 g_free(sanitized_name);
1054
1055 fd = mkstemp(filename);
1056 if (fd < 0) {
1057 error_setg_errno(errp, errno,
1058 "unable to create backing store for hugepages");
1059 g_free(filename);
1060 goto error;
1061 }
1062 unlink(filename);
1063 g_free(filename);
1064
1065 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1066
1067 /*
1068 * ftruncate is not supported by hugetlbfs in older
1069 * hosts, so don't bother bailing out on errors.
1070 * If anything goes wrong with it under other filesystems,
1071 * mmap will fail.
1072 */
1073 if (ftruncate(fd, memory)) {
1074 perror("ftruncate");
1075 }
1076
1077 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1078 if (area == MAP_FAILED) {
1079 error_setg_errno(errp, errno,
1080 "unable to map backing store for hugepages");
1081 close(fd);
1082 goto error;
1083 }
1084
1085 if (mem_prealloc) {
1086 os_mem_prealloc(fd, area, memory);
1087 }
1088
1089 block->fd = fd;
1090 return area;
1091
1092 error:
1093 if (mem_prealloc) {
1094 exit(1);
1095 }
1096 return NULL;
1097 }
1098 #endif
1099
1100 static ram_addr_t find_ram_offset(ram_addr_t size)
1101 {
1102 RAMBlock *block, *next_block;
1103 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1104
1105 assert(size != 0); /* it would hand out same offset multiple times */
1106
1107 if (QTAILQ_EMPTY(&ram_list.blocks))
1108 return 0;
1109
1110 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1111 ram_addr_t end, next = RAM_ADDR_MAX;
1112
1113 end = block->offset + block->length;
1114
1115 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1116 if (next_block->offset >= end) {
1117 next = MIN(next, next_block->offset);
1118 }
1119 }
1120 if (next - end >= size && next - end < mingap) {
1121 offset = end;
1122 mingap = next - end;
1123 }
1124 }
1125
1126 if (offset == RAM_ADDR_MAX) {
1127 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1128 (uint64_t)size);
1129 abort();
1130 }
1131
1132 return offset;
1133 }
1134
1135 ram_addr_t last_ram_offset(void)
1136 {
1137 RAMBlock *block;
1138 ram_addr_t last = 0;
1139
1140 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1141 last = MAX(last, block->offset + block->length);
1142
1143 return last;
1144 }
1145
1146 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1147 {
1148 int ret;
1149
1150 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1151 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1152 "dump-guest-core", true)) {
1153 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1154 if (ret) {
1155 perror("qemu_madvise");
1156 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1157 "but dump_guest_core=off specified\n");
1158 }
1159 }
1160 }
1161
1162 static RAMBlock *find_ram_block(ram_addr_t addr)
1163 {
1164 RAMBlock *block;
1165
1166 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167 if (block->offset == addr) {
1168 return block;
1169 }
1170 }
1171
1172 return NULL;
1173 }
1174
1175 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1176 {
1177 RAMBlock *new_block = find_ram_block(addr);
1178 RAMBlock *block;
1179
1180 assert(new_block);
1181 assert(!new_block->idstr[0]);
1182
1183 if (dev) {
1184 char *id = qdev_get_dev_path(dev);
1185 if (id) {
1186 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1187 g_free(id);
1188 }
1189 }
1190 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1191
1192 /* This assumes the iothread lock is taken here too. */
1193 qemu_mutex_lock_ramlist();
1194 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1195 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1196 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1197 new_block->idstr);
1198 abort();
1199 }
1200 }
1201 qemu_mutex_unlock_ramlist();
1202 }
1203
1204 void qemu_ram_unset_idstr(ram_addr_t addr)
1205 {
1206 RAMBlock *block = find_ram_block(addr);
1207
1208 if (block) {
1209 memset(block->idstr, 0, sizeof(block->idstr));
1210 }
1211 }
1212
1213 static int memory_try_enable_merging(void *addr, size_t len)
1214 {
1215 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1216 /* disabled by the user */
1217 return 0;
1218 }
1219
1220 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1221 }
1222
1223 static ram_addr_t ram_block_add(RAMBlock *new_block)
1224 {
1225 RAMBlock *block;
1226 ram_addr_t old_ram_size, new_ram_size;
1227
1228 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1229
1230 /* This assumes the iothread lock is taken here too. */
1231 qemu_mutex_lock_ramlist();
1232 new_block->offset = find_ram_offset(new_block->length);
1233
1234 if (!new_block->host) {
1235 if (xen_enabled()) {
1236 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1237 } else {
1238 new_block->host = phys_mem_alloc(new_block->length);
1239 if (!new_block->host) {
1240 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1241 new_block->mr->name, strerror(errno));
1242 exit(1);
1243 }
1244 memory_try_enable_merging(new_block->host, new_block->length);
1245 }
1246 }
1247
1248 /* Keep the list sorted from biggest to smallest block. */
1249 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1250 if (block->length < new_block->length) {
1251 break;
1252 }
1253 }
1254 if (block) {
1255 QTAILQ_INSERT_BEFORE(block, new_block, next);
1256 } else {
1257 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1258 }
1259 ram_list.mru_block = NULL;
1260
1261 ram_list.version++;
1262 qemu_mutex_unlock_ramlist();
1263
1264 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1265
1266 if (new_ram_size > old_ram_size) {
1267 int i;
1268 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1269 ram_list.dirty_memory[i] =
1270 bitmap_zero_extend(ram_list.dirty_memory[i],
1271 old_ram_size, new_ram_size);
1272 }
1273 }
1274 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1275
1276 qemu_ram_setup_dump(new_block->host, new_block->length);
1277 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1278 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1279
1280 if (kvm_enabled()) {
1281 kvm_setup_guest_memory(new_block->host, new_block->length);
1282 }
1283
1284 return new_block->offset;
1285 }
1286
1287 #ifdef __linux__
1288 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1289 const char *mem_path,
1290 Error **errp)
1291 {
1292 RAMBlock *new_block;
1293
1294 if (xen_enabled()) {
1295 error_setg(errp, "-mem-path not supported with Xen");
1296 return -1;
1297 }
1298
1299 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1300 /*
1301 * file_ram_alloc() needs to allocate just like
1302 * phys_mem_alloc, but we haven't bothered to provide
1303 * a hook there.
1304 */
1305 error_setg(errp,
1306 "-mem-path not supported with this accelerator");
1307 return -1;
1308 }
1309
1310 size = TARGET_PAGE_ALIGN(size);
1311 new_block = g_malloc0(sizeof(*new_block));
1312 new_block->mr = mr;
1313 new_block->length = size;
1314 new_block->host = file_ram_alloc(new_block, size,
1315 mem_path, errp);
1316 if (!new_block->host) {
1317 g_free(new_block);
1318 return -1;
1319 }
1320
1321 return ram_block_add(new_block);
1322 }
1323 #endif
1324
1325 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1326 MemoryRegion *mr)
1327 {
1328 RAMBlock *new_block;
1329
1330 size = TARGET_PAGE_ALIGN(size);
1331 new_block = g_malloc0(sizeof(*new_block));
1332 new_block->mr = mr;
1333 new_block->length = size;
1334 new_block->fd = -1;
1335 new_block->host = host;
1336 if (host) {
1337 new_block->flags |= RAM_PREALLOC;
1338 }
1339 return ram_block_add(new_block);
1340 }
1341
1342 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1343 {
1344 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1345 }
1346
1347 void qemu_ram_free_from_ptr(ram_addr_t addr)
1348 {
1349 RAMBlock *block;
1350
1351 /* This assumes the iothread lock is taken here too. */
1352 qemu_mutex_lock_ramlist();
1353 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1354 if (addr == block->offset) {
1355 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1356 ram_list.mru_block = NULL;
1357 ram_list.version++;
1358 g_free(block);
1359 break;
1360 }
1361 }
1362 qemu_mutex_unlock_ramlist();
1363 }
1364
1365 void qemu_ram_free(ram_addr_t addr)
1366 {
1367 RAMBlock *block;
1368
1369 /* This assumes the iothread lock is taken here too. */
1370 qemu_mutex_lock_ramlist();
1371 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1372 if (addr == block->offset) {
1373 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1374 ram_list.mru_block = NULL;
1375 ram_list.version++;
1376 if (block->flags & RAM_PREALLOC) {
1377 ;
1378 } else if (xen_enabled()) {
1379 xen_invalidate_map_cache_entry(block->host);
1380 #ifndef _WIN32
1381 } else if (block->fd >= 0) {
1382 munmap(block->host, block->length);
1383 close(block->fd);
1384 #endif
1385 } else {
1386 qemu_anon_ram_free(block->host, block->length);
1387 }
1388 g_free(block);
1389 break;
1390 }
1391 }
1392 qemu_mutex_unlock_ramlist();
1393
1394 }
1395
1396 #ifndef _WIN32
1397 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1398 {
1399 RAMBlock *block;
1400 ram_addr_t offset;
1401 int flags;
1402 void *area, *vaddr;
1403
1404 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1405 offset = addr - block->offset;
1406 if (offset < block->length) {
1407 vaddr = block->host + offset;
1408 if (block->flags & RAM_PREALLOC) {
1409 ;
1410 } else if (xen_enabled()) {
1411 abort();
1412 } else {
1413 flags = MAP_FIXED;
1414 munmap(vaddr, length);
1415 if (block->fd >= 0) {
1416 #ifdef MAP_POPULATE
1417 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1418 MAP_PRIVATE;
1419 #else
1420 flags |= MAP_PRIVATE;
1421 #endif
1422 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1423 flags, block->fd, offset);
1424 } else {
1425 /*
1426 * Remap needs to match alloc. Accelerators that
1427 * set phys_mem_alloc never remap. If they did,
1428 * we'd need a remap hook here.
1429 */
1430 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1431
1432 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1433 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1434 flags, -1, 0);
1435 }
1436 if (area != vaddr) {
1437 fprintf(stderr, "Could not remap addr: "
1438 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1439 length, addr);
1440 exit(1);
1441 }
1442 memory_try_enable_merging(vaddr, length);
1443 qemu_ram_setup_dump(vaddr, length);
1444 }
1445 return;
1446 }
1447 }
1448 }
1449 #endif /* !_WIN32 */
1450
1451 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1452 With the exception of the softmmu code in this file, this should
1453 only be used for local memory (e.g. video ram) that the device owns,
1454 and knows it isn't going to access beyond the end of the block.
1455
1456 It should not be used for general purpose DMA.
1457 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1458 */
1459 void *qemu_get_ram_ptr(ram_addr_t addr)
1460 {
1461 RAMBlock *block = qemu_get_ram_block(addr);
1462
1463 if (xen_enabled()) {
1464 /* We need to check if the requested address is in the RAM
1465 * because we don't want to map the entire memory in QEMU.
1466 * In that case just map until the end of the page.
1467 */
1468 if (block->offset == 0) {
1469 return xen_map_cache(addr, 0, 0);
1470 } else if (block->host == NULL) {
1471 block->host =
1472 xen_map_cache(block->offset, block->length, 1);
1473 }
1474 }
1475 return block->host + (addr - block->offset);
1476 }
1477
1478 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1479 * but takes a size argument */
1480 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1481 {
1482 if (*size == 0) {
1483 return NULL;
1484 }
1485 if (xen_enabled()) {
1486 return xen_map_cache(addr, *size, 1);
1487 } else {
1488 RAMBlock *block;
1489
1490 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1491 if (addr - block->offset < block->length) {
1492 if (addr - block->offset + *size > block->length)
1493 *size = block->length - addr + block->offset;
1494 return block->host + (addr - block->offset);
1495 }
1496 }
1497
1498 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1499 abort();
1500 }
1501 }
1502
1503 /* Some of the softmmu routines need to translate from a host pointer
1504 (typically a TLB entry) back to a ram offset. */
1505 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1506 {
1507 RAMBlock *block;
1508 uint8_t *host = ptr;
1509
1510 if (xen_enabled()) {
1511 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1512 return qemu_get_ram_block(*ram_addr)->mr;
1513 }
1514
1515 block = ram_list.mru_block;
1516 if (block && block->host && host - block->host < block->length) {
1517 goto found;
1518 }
1519
1520 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1521 /* This case append when the block is not mapped. */
1522 if (block->host == NULL) {
1523 continue;
1524 }
1525 if (host - block->host < block->length) {
1526 goto found;
1527 }
1528 }
1529
1530 return NULL;
1531
1532 found:
1533 *ram_addr = block->offset + (host - block->host);
1534 return block->mr;
1535 }
1536
1537 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1538 uint64_t val, unsigned size)
1539 {
1540 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1541 tb_invalidate_phys_page_fast(ram_addr, size);
1542 }
1543 switch (size) {
1544 case 1:
1545 stb_p(qemu_get_ram_ptr(ram_addr), val);
1546 break;
1547 case 2:
1548 stw_p(qemu_get_ram_ptr(ram_addr), val);
1549 break;
1550 case 4:
1551 stl_p(qemu_get_ram_ptr(ram_addr), val);
1552 break;
1553 default:
1554 abort();
1555 }
1556 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1557 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1558 /* we remove the notdirty callback only if the code has been
1559 flushed */
1560 if (!cpu_physical_memory_is_clean(ram_addr)) {
1561 CPUArchState *env = current_cpu->env_ptr;
1562 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1563 }
1564 }
1565
1566 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1567 unsigned size, bool is_write)
1568 {
1569 return is_write;
1570 }
1571
1572 static const MemoryRegionOps notdirty_mem_ops = {
1573 .write = notdirty_mem_write,
1574 .valid.accepts = notdirty_mem_accepts,
1575 .endianness = DEVICE_NATIVE_ENDIAN,
1576 };
1577
1578 /* Generate a debug exception if a watchpoint has been hit. */
1579 static void check_watchpoint(int offset, int len_mask, int flags)
1580 {
1581 CPUState *cpu = current_cpu;
1582 CPUArchState *env = cpu->env_ptr;
1583 target_ulong pc, cs_base;
1584 target_ulong vaddr;
1585 CPUWatchpoint *wp;
1586 int cpu_flags;
1587
1588 if (cpu->watchpoint_hit) {
1589 /* We re-entered the check after replacing the TB. Now raise
1590 * the debug interrupt so that is will trigger after the
1591 * current instruction. */
1592 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1593 return;
1594 }
1595 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1596 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1597 if ((vaddr == (wp->vaddr & len_mask) ||
1598 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1599 wp->flags |= BP_WATCHPOINT_HIT;
1600 if (!cpu->watchpoint_hit) {
1601 cpu->watchpoint_hit = wp;
1602 tb_check_watchpoint(cpu);
1603 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1604 cpu->exception_index = EXCP_DEBUG;
1605 cpu_loop_exit(cpu);
1606 } else {
1607 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1608 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1609 cpu_resume_from_signal(cpu, NULL);
1610 }
1611 }
1612 } else {
1613 wp->flags &= ~BP_WATCHPOINT_HIT;
1614 }
1615 }
1616 }
1617
1618 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1619 so these check for a hit then pass through to the normal out-of-line
1620 phys routines. */
1621 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1622 unsigned size)
1623 {
1624 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1625 switch (size) {
1626 case 1: return ldub_phys(&address_space_memory, addr);
1627 case 2: return lduw_phys(&address_space_memory, addr);
1628 case 4: return ldl_phys(&address_space_memory, addr);
1629 default: abort();
1630 }
1631 }
1632
1633 static void watch_mem_write(void *opaque, hwaddr addr,
1634 uint64_t val, unsigned size)
1635 {
1636 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1637 switch (size) {
1638 case 1:
1639 stb_phys(&address_space_memory, addr, val);
1640 break;
1641 case 2:
1642 stw_phys(&address_space_memory, addr, val);
1643 break;
1644 case 4:
1645 stl_phys(&address_space_memory, addr, val);
1646 break;
1647 default: abort();
1648 }
1649 }
1650
1651 static const MemoryRegionOps watch_mem_ops = {
1652 .read = watch_mem_read,
1653 .write = watch_mem_write,
1654 .endianness = DEVICE_NATIVE_ENDIAN,
1655 };
1656
1657 static uint64_t subpage_read(void *opaque, hwaddr addr,
1658 unsigned len)
1659 {
1660 subpage_t *subpage = opaque;
1661 uint8_t buf[4];
1662
1663 #if defined(DEBUG_SUBPAGE)
1664 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1665 subpage, len, addr);
1666 #endif
1667 address_space_read(subpage->as, addr + subpage->base, buf, len);
1668 switch (len) {
1669 case 1:
1670 return ldub_p(buf);
1671 case 2:
1672 return lduw_p(buf);
1673 case 4:
1674 return ldl_p(buf);
1675 default:
1676 abort();
1677 }
1678 }
1679
1680 static void subpage_write(void *opaque, hwaddr addr,
1681 uint64_t value, unsigned len)
1682 {
1683 subpage_t *subpage = opaque;
1684 uint8_t buf[4];
1685
1686 #if defined(DEBUG_SUBPAGE)
1687 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1688 " value %"PRIx64"\n",
1689 __func__, subpage, len, addr, value);
1690 #endif
1691 switch (len) {
1692 case 1:
1693 stb_p(buf, value);
1694 break;
1695 case 2:
1696 stw_p(buf, value);
1697 break;
1698 case 4:
1699 stl_p(buf, value);
1700 break;
1701 default:
1702 abort();
1703 }
1704 address_space_write(subpage->as, addr + subpage->base, buf, len);
1705 }
1706
1707 static bool subpage_accepts(void *opaque, hwaddr addr,
1708 unsigned len, bool is_write)
1709 {
1710 subpage_t *subpage = opaque;
1711 #if defined(DEBUG_SUBPAGE)
1712 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1713 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1714 #endif
1715
1716 return address_space_access_valid(subpage->as, addr + subpage->base,
1717 len, is_write);
1718 }
1719
1720 static const MemoryRegionOps subpage_ops = {
1721 .read = subpage_read,
1722 .write = subpage_write,
1723 .valid.accepts = subpage_accepts,
1724 .endianness = DEVICE_NATIVE_ENDIAN,
1725 };
1726
1727 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1728 uint16_t section)
1729 {
1730 int idx, eidx;
1731
1732 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1733 return -1;
1734 idx = SUBPAGE_IDX(start);
1735 eidx = SUBPAGE_IDX(end);
1736 #if defined(DEBUG_SUBPAGE)
1737 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1738 __func__, mmio, start, end, idx, eidx, section);
1739 #endif
1740 for (; idx <= eidx; idx++) {
1741 mmio->sub_section[idx] = section;
1742 }
1743
1744 return 0;
1745 }
1746
1747 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1748 {
1749 subpage_t *mmio;
1750
1751 mmio = g_malloc0(sizeof(subpage_t));
1752
1753 mmio->as = as;
1754 mmio->base = base;
1755 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1756 "subpage", TARGET_PAGE_SIZE);
1757 mmio->iomem.subpage = true;
1758 #if defined(DEBUG_SUBPAGE)
1759 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1760 mmio, base, TARGET_PAGE_SIZE);
1761 #endif
1762 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1763
1764 return mmio;
1765 }
1766
1767 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1768 MemoryRegion *mr)
1769 {
1770 assert(as);
1771 MemoryRegionSection section = {
1772 .address_space = as,
1773 .mr = mr,
1774 .offset_within_address_space = 0,
1775 .offset_within_region = 0,
1776 .size = int128_2_64(),
1777 };
1778
1779 return phys_section_add(map, &section);
1780 }
1781
1782 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1783 {
1784 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1785 }
1786
1787 static void io_mem_init(void)
1788 {
1789 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1790 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1791 "unassigned", UINT64_MAX);
1792 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1793 "notdirty", UINT64_MAX);
1794 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1795 "watch", UINT64_MAX);
1796 }
1797
1798 static void mem_begin(MemoryListener *listener)
1799 {
1800 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1801 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1802 uint16_t n;
1803
1804 n = dummy_section(&d->map, as, &io_mem_unassigned);
1805 assert(n == PHYS_SECTION_UNASSIGNED);
1806 n = dummy_section(&d->map, as, &io_mem_notdirty);
1807 assert(n == PHYS_SECTION_NOTDIRTY);
1808 n = dummy_section(&d->map, as, &io_mem_rom);
1809 assert(n == PHYS_SECTION_ROM);
1810 n = dummy_section(&d->map, as, &io_mem_watch);
1811 assert(n == PHYS_SECTION_WATCH);
1812
1813 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1814 d->as = as;
1815 as->next_dispatch = d;
1816 }
1817
1818 static void mem_commit(MemoryListener *listener)
1819 {
1820 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1821 AddressSpaceDispatch *cur = as->dispatch;
1822 AddressSpaceDispatch *next = as->next_dispatch;
1823
1824 phys_page_compact_all(next, next->map.nodes_nb);
1825
1826 as->dispatch = next;
1827
1828 if (cur) {
1829 phys_sections_free(&cur->map);
1830 g_free(cur);
1831 }
1832 }
1833
1834 static void tcg_commit(MemoryListener *listener)
1835 {
1836 CPUState *cpu;
1837
1838 /* since each CPU stores ram addresses in its TLB cache, we must
1839 reset the modified entries */
1840 /* XXX: slow ! */
1841 CPU_FOREACH(cpu) {
1842 /* FIXME: Disentangle the cpu.h circular files deps so we can
1843 directly get the right CPU from listener. */
1844 if (cpu->tcg_as_listener != listener) {
1845 continue;
1846 }
1847 tlb_flush(cpu, 1);
1848 }
1849 }
1850
1851 static void core_log_global_start(MemoryListener *listener)
1852 {
1853 cpu_physical_memory_set_dirty_tracking(true);
1854 }
1855
1856 static void core_log_global_stop(MemoryListener *listener)
1857 {
1858 cpu_physical_memory_set_dirty_tracking(false);
1859 }
1860
1861 static MemoryListener core_memory_listener = {
1862 .log_global_start = core_log_global_start,
1863 .log_global_stop = core_log_global_stop,
1864 .priority = 1,
1865 };
1866
1867 void address_space_init_dispatch(AddressSpace *as)
1868 {
1869 as->dispatch = NULL;
1870 as->dispatch_listener = (MemoryListener) {
1871 .begin = mem_begin,
1872 .commit = mem_commit,
1873 .region_add = mem_add,
1874 .region_nop = mem_add,
1875 .priority = 0,
1876 };
1877 memory_listener_register(&as->dispatch_listener, as);
1878 }
1879
1880 void address_space_destroy_dispatch(AddressSpace *as)
1881 {
1882 AddressSpaceDispatch *d = as->dispatch;
1883
1884 memory_listener_unregister(&as->dispatch_listener);
1885 g_free(d);
1886 as->dispatch = NULL;
1887 }
1888
1889 static void memory_map_init(void)
1890 {
1891 system_memory = g_malloc(sizeof(*system_memory));
1892
1893 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1894 address_space_init(&address_space_memory, system_memory, "memory");
1895
1896 system_io = g_malloc(sizeof(*system_io));
1897 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1898 65536);
1899 address_space_init(&address_space_io, system_io, "I/O");
1900
1901 memory_listener_register(&core_memory_listener, &address_space_memory);
1902 }
1903
1904 MemoryRegion *get_system_memory(void)
1905 {
1906 return system_memory;
1907 }
1908
1909 MemoryRegion *get_system_io(void)
1910 {
1911 return system_io;
1912 }
1913
1914 #endif /* !defined(CONFIG_USER_ONLY) */
1915
1916 /* physical memory access (slow version, mainly for debug) */
1917 #if defined(CONFIG_USER_ONLY)
1918 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1919 uint8_t *buf, int len, int is_write)
1920 {
1921 int l, flags;
1922 target_ulong page;
1923 void * p;
1924
1925 while (len > 0) {
1926 page = addr & TARGET_PAGE_MASK;
1927 l = (page + TARGET_PAGE_SIZE) - addr;
1928 if (l > len)
1929 l = len;
1930 flags = page_get_flags(page);
1931 if (!(flags & PAGE_VALID))
1932 return -1;
1933 if (is_write) {
1934 if (!(flags & PAGE_WRITE))
1935 return -1;
1936 /* XXX: this code should not depend on lock_user */
1937 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1938 return -1;
1939 memcpy(p, buf, l);
1940 unlock_user(p, addr, l);
1941 } else {
1942 if (!(flags & PAGE_READ))
1943 return -1;
1944 /* XXX: this code should not depend on lock_user */
1945 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1946 return -1;
1947 memcpy(buf, p, l);
1948 unlock_user(p, addr, 0);
1949 }
1950 len -= l;
1951 buf += l;
1952 addr += l;
1953 }
1954 return 0;
1955 }
1956
1957 #else
1958
1959 static void invalidate_and_set_dirty(hwaddr addr,
1960 hwaddr length)
1961 {
1962 if (cpu_physical_memory_is_clean(addr)) {
1963 /* invalidate code */
1964 tb_invalidate_phys_page_range(addr, addr + length, 0);
1965 /* set dirty bit */
1966 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1967 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1968 }
1969 xen_modified_memory(addr, length);
1970 }
1971
1972 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1973 {
1974 unsigned access_size_max = mr->ops->valid.max_access_size;
1975
1976 /* Regions are assumed to support 1-4 byte accesses unless
1977 otherwise specified. */
1978 if (access_size_max == 0) {
1979 access_size_max = 4;
1980 }
1981
1982 /* Bound the maximum access by the alignment of the address. */
1983 if (!mr->ops->impl.unaligned) {
1984 unsigned align_size_max = addr & -addr;
1985 if (align_size_max != 0 && align_size_max < access_size_max) {
1986 access_size_max = align_size_max;
1987 }
1988 }
1989
1990 /* Don't attempt accesses larger than the maximum. */
1991 if (l > access_size_max) {
1992 l = access_size_max;
1993 }
1994 if (l & (l - 1)) {
1995 l = 1 << (qemu_fls(l) - 1);
1996 }
1997
1998 return l;
1999 }
2000
2001 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2002 int len, bool is_write)
2003 {
2004 hwaddr l;
2005 uint8_t *ptr;
2006 uint64_t val;
2007 hwaddr addr1;
2008 MemoryRegion *mr;
2009 bool error = false;
2010
2011 while (len > 0) {
2012 l = len;
2013 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2014
2015 if (is_write) {
2016 if (!memory_access_is_direct(mr, is_write)) {
2017 l = memory_access_size(mr, l, addr1);
2018 /* XXX: could force current_cpu to NULL to avoid
2019 potential bugs */
2020 switch (l) {
2021 case 8:
2022 /* 64 bit write access */
2023 val = ldq_p(buf);
2024 error |= io_mem_write(mr, addr1, val, 8);
2025 break;
2026 case 4:
2027 /* 32 bit write access */
2028 val = ldl_p(buf);
2029 error |= io_mem_write(mr, addr1, val, 4);
2030 break;
2031 case 2:
2032 /* 16 bit write access */
2033 val = lduw_p(buf);
2034 error |= io_mem_write(mr, addr1, val, 2);
2035 break;
2036 case 1:
2037 /* 8 bit write access */
2038 val = ldub_p(buf);
2039 error |= io_mem_write(mr, addr1, val, 1);
2040 break;
2041 default:
2042 abort();
2043 }
2044 } else {
2045 addr1 += memory_region_get_ram_addr(mr);
2046 /* RAM case */
2047 ptr = qemu_get_ram_ptr(addr1);
2048 memcpy(ptr, buf, l);
2049 invalidate_and_set_dirty(addr1, l);
2050 }
2051 } else {
2052 if (!memory_access_is_direct(mr, is_write)) {
2053 /* I/O case */
2054 l = memory_access_size(mr, l, addr1);
2055 switch (l) {
2056 case 8:
2057 /* 64 bit read access */
2058 error |= io_mem_read(mr, addr1, &val, 8);
2059 stq_p(buf, val);
2060 break;
2061 case 4:
2062 /* 32 bit read access */
2063 error |= io_mem_read(mr, addr1, &val, 4);
2064 stl_p(buf, val);
2065 break;
2066 case 2:
2067 /* 16 bit read access */
2068 error |= io_mem_read(mr, addr1, &val, 2);
2069 stw_p(buf, val);
2070 break;
2071 case 1:
2072 /* 8 bit read access */
2073 error |= io_mem_read(mr, addr1, &val, 1);
2074 stb_p(buf, val);
2075 break;
2076 default:
2077 abort();
2078 }
2079 } else {
2080 /* RAM case */
2081 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2082 memcpy(buf, ptr, l);
2083 }
2084 }
2085 len -= l;
2086 buf += l;
2087 addr += l;
2088 }
2089
2090 return error;
2091 }
2092
2093 bool address_space_write(AddressSpace *as, hwaddr addr,
2094 const uint8_t *buf, int len)
2095 {
2096 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2097 }
2098
2099 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2100 {
2101 return address_space_rw(as, addr, buf, len, false);
2102 }
2103
2104
2105 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2106 int len, int is_write)
2107 {
2108 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2109 }
2110
2111 enum write_rom_type {
2112 WRITE_DATA,
2113 FLUSH_CACHE,
2114 };
2115
2116 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2117 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2118 {
2119 hwaddr l;
2120 uint8_t *ptr;
2121 hwaddr addr1;
2122 MemoryRegion *mr;
2123
2124 while (len > 0) {
2125 l = len;
2126 mr = address_space_translate(as, addr, &addr1, &l, true);
2127
2128 if (!(memory_region_is_ram(mr) ||
2129 memory_region_is_romd(mr))) {
2130 /* do nothing */
2131 } else {
2132 addr1 += memory_region_get_ram_addr(mr);
2133 /* ROM/RAM case */
2134 ptr = qemu_get_ram_ptr(addr1);
2135 switch (type) {
2136 case WRITE_DATA:
2137 memcpy(ptr, buf, l);
2138 invalidate_and_set_dirty(addr1, l);
2139 break;
2140 case FLUSH_CACHE:
2141 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2142 break;
2143 }
2144 }
2145 len -= l;
2146 buf += l;
2147 addr += l;
2148 }
2149 }
2150
2151 /* used for ROM loading : can write in RAM and ROM */
2152 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2153 const uint8_t *buf, int len)
2154 {
2155 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2156 }
2157
2158 void cpu_flush_icache_range(hwaddr start, int len)
2159 {
2160 /*
2161 * This function should do the same thing as an icache flush that was
2162 * triggered from within the guest. For TCG we are always cache coherent,
2163 * so there is no need to flush anything. For KVM / Xen we need to flush
2164 * the host's instruction cache at least.
2165 */
2166 if (tcg_enabled()) {
2167 return;
2168 }
2169
2170 cpu_physical_memory_write_rom_internal(&address_space_memory,
2171 start, NULL, len, FLUSH_CACHE);
2172 }
2173
2174 typedef struct {
2175 MemoryRegion *mr;
2176 void *buffer;
2177 hwaddr addr;
2178 hwaddr len;
2179 } BounceBuffer;
2180
2181 static BounceBuffer bounce;
2182
2183 typedef struct MapClient {
2184 void *opaque;
2185 void (*callback)(void *opaque);
2186 QLIST_ENTRY(MapClient) link;
2187 } MapClient;
2188
2189 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2190 = QLIST_HEAD_INITIALIZER(map_client_list);
2191
2192 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2193 {
2194 MapClient *client = g_malloc(sizeof(*client));
2195
2196 client->opaque = opaque;
2197 client->callback = callback;
2198 QLIST_INSERT_HEAD(&map_client_list, client, link);
2199 return client;
2200 }
2201
2202 static void cpu_unregister_map_client(void *_client)
2203 {
2204 MapClient *client = (MapClient *)_client;
2205
2206 QLIST_REMOVE(client, link);
2207 g_free(client);
2208 }
2209
2210 static void cpu_notify_map_clients(void)
2211 {
2212 MapClient *client;
2213
2214 while (!QLIST_EMPTY(&map_client_list)) {
2215 client = QLIST_FIRST(&map_client_list);
2216 client->callback(client->opaque);
2217 cpu_unregister_map_client(client);
2218 }
2219 }
2220
2221 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2222 {
2223 MemoryRegion *mr;
2224 hwaddr l, xlat;
2225
2226 while (len > 0) {
2227 l = len;
2228 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2229 if (!memory_access_is_direct(mr, is_write)) {
2230 l = memory_access_size(mr, l, addr);
2231 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2232 return false;
2233 }
2234 }
2235
2236 len -= l;
2237 addr += l;
2238 }
2239 return true;
2240 }
2241
2242 /* Map a physical memory region into a host virtual address.
2243 * May map a subset of the requested range, given by and returned in *plen.
2244 * May return NULL if resources needed to perform the mapping are exhausted.
2245 * Use only for reads OR writes - not for read-modify-write operations.
2246 * Use cpu_register_map_client() to know when retrying the map operation is
2247 * likely to succeed.
2248 */
2249 void *address_space_map(AddressSpace *as,
2250 hwaddr addr,
2251 hwaddr *plen,
2252 bool is_write)
2253 {
2254 hwaddr len = *plen;
2255 hwaddr done = 0;
2256 hwaddr l, xlat, base;
2257 MemoryRegion *mr, *this_mr;
2258 ram_addr_t raddr;
2259
2260 if (len == 0) {
2261 return NULL;
2262 }
2263
2264 l = len;
2265 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2266 if (!memory_access_is_direct(mr, is_write)) {
2267 if (bounce.buffer) {
2268 return NULL;
2269 }
2270 /* Avoid unbounded allocations */
2271 l = MIN(l, TARGET_PAGE_SIZE);
2272 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2273 bounce.addr = addr;
2274 bounce.len = l;
2275
2276 memory_region_ref(mr);
2277 bounce.mr = mr;
2278 if (!is_write) {
2279 address_space_read(as, addr, bounce.buffer, l);
2280 }
2281
2282 *plen = l;
2283 return bounce.buffer;
2284 }
2285
2286 base = xlat;
2287 raddr = memory_region_get_ram_addr(mr);
2288
2289 for (;;) {
2290 len -= l;
2291 addr += l;
2292 done += l;
2293 if (len == 0) {
2294 break;
2295 }
2296
2297 l = len;
2298 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2299 if (this_mr != mr || xlat != base + done) {
2300 break;
2301 }
2302 }
2303
2304 memory_region_ref(mr);
2305 *plen = done;
2306 return qemu_ram_ptr_length(raddr + base, plen);
2307 }
2308
2309 /* Unmaps a memory region previously mapped by address_space_map().
2310 * Will also mark the memory as dirty if is_write == 1. access_len gives
2311 * the amount of memory that was actually read or written by the caller.
2312 */
2313 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2314 int is_write, hwaddr access_len)
2315 {
2316 if (buffer != bounce.buffer) {
2317 MemoryRegion *mr;
2318 ram_addr_t addr1;
2319
2320 mr = qemu_ram_addr_from_host(buffer, &addr1);
2321 assert(mr != NULL);
2322 if (is_write) {
2323 while (access_len) {
2324 unsigned l;
2325 l = TARGET_PAGE_SIZE;
2326 if (l > access_len)
2327 l = access_len;
2328 invalidate_and_set_dirty(addr1, l);
2329 addr1 += l;
2330 access_len -= l;
2331 }
2332 }
2333 if (xen_enabled()) {
2334 xen_invalidate_map_cache_entry(buffer);
2335 }
2336 memory_region_unref(mr);
2337 return;
2338 }
2339 if (is_write) {
2340 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2341 }
2342 qemu_vfree(bounce.buffer);
2343 bounce.buffer = NULL;
2344 memory_region_unref(bounce.mr);
2345 cpu_notify_map_clients();
2346 }
2347
2348 void *cpu_physical_memory_map(hwaddr addr,
2349 hwaddr *plen,
2350 int is_write)
2351 {
2352 return address_space_map(&address_space_memory, addr, plen, is_write);
2353 }
2354
2355 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2356 int is_write, hwaddr access_len)
2357 {
2358 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2359 }
2360
2361 /* warning: addr must be aligned */
2362 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2363 enum device_endian endian)
2364 {
2365 uint8_t *ptr;
2366 uint64_t val;
2367 MemoryRegion *mr;
2368 hwaddr l = 4;
2369 hwaddr addr1;
2370
2371 mr = address_space_translate(as, addr, &addr1, &l, false);
2372 if (l < 4 || !memory_access_is_direct(mr, false)) {
2373 /* I/O case */
2374 io_mem_read(mr, addr1, &val, 4);
2375 #if defined(TARGET_WORDS_BIGENDIAN)
2376 if (endian == DEVICE_LITTLE_ENDIAN) {
2377 val = bswap32(val);
2378 }
2379 #else
2380 if (endian == DEVICE_BIG_ENDIAN) {
2381 val = bswap32(val);
2382 }
2383 #endif
2384 } else {
2385 /* RAM case */
2386 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2387 & TARGET_PAGE_MASK)
2388 + addr1);
2389 switch (endian) {
2390 case DEVICE_LITTLE_ENDIAN:
2391 val = ldl_le_p(ptr);
2392 break;
2393 case DEVICE_BIG_ENDIAN:
2394 val = ldl_be_p(ptr);
2395 break;
2396 default:
2397 val = ldl_p(ptr);
2398 break;
2399 }
2400 }
2401 return val;
2402 }
2403
2404 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2405 {
2406 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2407 }
2408
2409 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2410 {
2411 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2412 }
2413
2414 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2415 {
2416 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2417 }
2418
2419 /* warning: addr must be aligned */
2420 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2421 enum device_endian endian)
2422 {
2423 uint8_t *ptr;
2424 uint64_t val;
2425 MemoryRegion *mr;
2426 hwaddr l = 8;
2427 hwaddr addr1;
2428
2429 mr = address_space_translate(as, addr, &addr1, &l,
2430 false);
2431 if (l < 8 || !memory_access_is_direct(mr, false)) {
2432 /* I/O case */
2433 io_mem_read(mr, addr1, &val, 8);
2434 #if defined(TARGET_WORDS_BIGENDIAN)
2435 if (endian == DEVICE_LITTLE_ENDIAN) {
2436 val = bswap64(val);
2437 }
2438 #else
2439 if (endian == DEVICE_BIG_ENDIAN) {
2440 val = bswap64(val);
2441 }
2442 #endif
2443 } else {
2444 /* RAM case */
2445 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2446 & TARGET_PAGE_MASK)
2447 + addr1);
2448 switch (endian) {
2449 case DEVICE_LITTLE_ENDIAN:
2450 val = ldq_le_p(ptr);
2451 break;
2452 case DEVICE_BIG_ENDIAN:
2453 val = ldq_be_p(ptr);
2454 break;
2455 default:
2456 val = ldq_p(ptr);
2457 break;
2458 }
2459 }
2460 return val;
2461 }
2462
2463 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2464 {
2465 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2466 }
2467
2468 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2469 {
2470 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2471 }
2472
2473 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2474 {
2475 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2476 }
2477
2478 /* XXX: optimize */
2479 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2480 {
2481 uint8_t val;
2482 address_space_rw(as, addr, &val, 1, 0);
2483 return val;
2484 }
2485
2486 /* warning: addr must be aligned */
2487 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2488 enum device_endian endian)
2489 {
2490 uint8_t *ptr;
2491 uint64_t val;
2492 MemoryRegion *mr;
2493 hwaddr l = 2;
2494 hwaddr addr1;
2495
2496 mr = address_space_translate(as, addr, &addr1, &l,
2497 false);
2498 if (l < 2 || !memory_access_is_direct(mr, false)) {
2499 /* I/O case */
2500 io_mem_read(mr, addr1, &val, 2);
2501 #if defined(TARGET_WORDS_BIGENDIAN)
2502 if (endian == DEVICE_LITTLE_ENDIAN) {
2503 val = bswap16(val);
2504 }
2505 #else
2506 if (endian == DEVICE_BIG_ENDIAN) {
2507 val = bswap16(val);
2508 }
2509 #endif
2510 } else {
2511 /* RAM case */
2512 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2513 & TARGET_PAGE_MASK)
2514 + addr1);
2515 switch (endian) {
2516 case DEVICE_LITTLE_ENDIAN:
2517 val = lduw_le_p(ptr);
2518 break;
2519 case DEVICE_BIG_ENDIAN:
2520 val = lduw_be_p(ptr);
2521 break;
2522 default:
2523 val = lduw_p(ptr);
2524 break;
2525 }
2526 }
2527 return val;
2528 }
2529
2530 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2531 {
2532 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2533 }
2534
2535 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2536 {
2537 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2538 }
2539
2540 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2541 {
2542 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2543 }
2544
2545 /* warning: addr must be aligned. The ram page is not masked as dirty
2546 and the code inside is not invalidated. It is useful if the dirty
2547 bits are used to track modified PTEs */
2548 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2549 {
2550 uint8_t *ptr;
2551 MemoryRegion *mr;
2552 hwaddr l = 4;
2553 hwaddr addr1;
2554
2555 mr = address_space_translate(as, addr, &addr1, &l,
2556 true);
2557 if (l < 4 || !memory_access_is_direct(mr, true)) {
2558 io_mem_write(mr, addr1, val, 4);
2559 } else {
2560 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2561 ptr = qemu_get_ram_ptr(addr1);
2562 stl_p(ptr, val);
2563
2564 if (unlikely(in_migration)) {
2565 if (cpu_physical_memory_is_clean(addr1)) {
2566 /* invalidate code */
2567 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2568 /* set dirty bit */
2569 cpu_physical_memory_set_dirty_flag(addr1,
2570 DIRTY_MEMORY_MIGRATION);
2571 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2572 }
2573 }
2574 }
2575 }
2576
2577 /* warning: addr must be aligned */
2578 static inline void stl_phys_internal(AddressSpace *as,
2579 hwaddr addr, uint32_t val,
2580 enum device_endian endian)
2581 {
2582 uint8_t *ptr;
2583 MemoryRegion *mr;
2584 hwaddr l = 4;
2585 hwaddr addr1;
2586
2587 mr = address_space_translate(as, addr, &addr1, &l,
2588 true);
2589 if (l < 4 || !memory_access_is_direct(mr, true)) {
2590 #if defined(TARGET_WORDS_BIGENDIAN)
2591 if (endian == DEVICE_LITTLE_ENDIAN) {
2592 val = bswap32(val);
2593 }
2594 #else
2595 if (endian == DEVICE_BIG_ENDIAN) {
2596 val = bswap32(val);
2597 }
2598 #endif
2599 io_mem_write(mr, addr1, val, 4);
2600 } else {
2601 /* RAM case */
2602 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2603 ptr = qemu_get_ram_ptr(addr1);
2604 switch (endian) {
2605 case DEVICE_LITTLE_ENDIAN:
2606 stl_le_p(ptr, val);
2607 break;
2608 case DEVICE_BIG_ENDIAN:
2609 stl_be_p(ptr, val);
2610 break;
2611 default:
2612 stl_p(ptr, val);
2613 break;
2614 }
2615 invalidate_and_set_dirty(addr1, 4);
2616 }
2617 }
2618
2619 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2620 {
2621 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2622 }
2623
2624 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2625 {
2626 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2627 }
2628
2629 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2630 {
2631 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2632 }
2633
2634 /* XXX: optimize */
2635 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2636 {
2637 uint8_t v = val;
2638 address_space_rw(as, addr, &v, 1, 1);
2639 }
2640
2641 /* warning: addr must be aligned */
2642 static inline void stw_phys_internal(AddressSpace *as,
2643 hwaddr addr, uint32_t val,
2644 enum device_endian endian)
2645 {
2646 uint8_t *ptr;
2647 MemoryRegion *mr;
2648 hwaddr l = 2;
2649 hwaddr addr1;
2650
2651 mr = address_space_translate(as, addr, &addr1, &l, true);
2652 if (l < 2 || !memory_access_is_direct(mr, true)) {
2653 #if defined(TARGET_WORDS_BIGENDIAN)
2654 if (endian == DEVICE_LITTLE_ENDIAN) {
2655 val = bswap16(val);
2656 }
2657 #else
2658 if (endian == DEVICE_BIG_ENDIAN) {
2659 val = bswap16(val);
2660 }
2661 #endif
2662 io_mem_write(mr, addr1, val, 2);
2663 } else {
2664 /* RAM case */
2665 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2666 ptr = qemu_get_ram_ptr(addr1);
2667 switch (endian) {
2668 case DEVICE_LITTLE_ENDIAN:
2669 stw_le_p(ptr, val);
2670 break;
2671 case DEVICE_BIG_ENDIAN:
2672 stw_be_p(ptr, val);
2673 break;
2674 default:
2675 stw_p(ptr, val);
2676 break;
2677 }
2678 invalidate_and_set_dirty(addr1, 2);
2679 }
2680 }
2681
2682 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2683 {
2684 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2685 }
2686
2687 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2688 {
2689 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2690 }
2691
2692 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2693 {
2694 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2695 }
2696
2697 /* XXX: optimize */
2698 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2699 {
2700 val = tswap64(val);
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2702 }
2703
2704 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2705 {
2706 val = cpu_to_le64(val);
2707 address_space_rw(as, addr, (void *) &val, 8, 1);
2708 }
2709
2710 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2711 {
2712 val = cpu_to_be64(val);
2713 address_space_rw(as, addr, (void *) &val, 8, 1);
2714 }
2715
2716 /* virtual memory access for debug (includes writing to ROM) */
2717 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2718 uint8_t *buf, int len, int is_write)
2719 {
2720 int l;
2721 hwaddr phys_addr;
2722 target_ulong page;
2723
2724 while (len > 0) {
2725 page = addr & TARGET_PAGE_MASK;
2726 phys_addr = cpu_get_phys_page_debug(cpu, page);
2727 /* if no physical page mapped, return an error */
2728 if (phys_addr == -1)
2729 return -1;
2730 l = (page + TARGET_PAGE_SIZE) - addr;
2731 if (l > len)
2732 l = len;
2733 phys_addr += (addr & ~TARGET_PAGE_MASK);
2734 if (is_write) {
2735 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2736 } else {
2737 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2738 }
2739 len -= l;
2740 buf += l;
2741 addr += l;
2742 }
2743 return 0;
2744 }
2745 #endif
2746
2747 #if !defined(CONFIG_USER_ONLY)
2748
2749 /*
2750 * A helper function for the _utterly broken_ virtio device model to find out if
2751 * it's running on a big endian machine. Don't do this at home kids!
2752 */
2753 bool virtio_is_big_endian(void);
2754 bool virtio_is_big_endian(void)
2755 {
2756 #if defined(TARGET_WORDS_BIGENDIAN)
2757 return true;
2758 #else
2759 return false;
2760 #endif
2761 }
2762
2763 #endif
2764
2765 #ifndef CONFIG_USER_ONLY
2766 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2767 {
2768 MemoryRegion*mr;
2769 hwaddr l = 1;
2770
2771 mr = address_space_translate(&address_space_memory,
2772 phys_addr, &phys_addr, &l, false);
2773
2774 return !(memory_region_is_ram(mr) ||
2775 memory_region_is_romd(mr));
2776 }
2777
2778 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2779 {
2780 RAMBlock *block;
2781
2782 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2783 func(block->host, block->offset, block->length, opaque);
2784 }
2785 }
2786 #endif