exec: Respect as_translate_internal length clamp
[qemu.git] / exec.c
1 /*
2 * Virtual page mapping
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifndef _WIN32
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #endif
24
25 #include "qemu-common.h"
26 #include "cpu.h"
27 #include "tcg.h"
28 #include "hw/hw.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #endif
32 #include "hw/qdev.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
47 #include "trace.h"
48 #endif
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
53
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
56
57 #include "qemu/range.h"
58
59 //#define DEBUG_SUBPAGE
60
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration;
63
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
67 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
68
69 static MemoryRegion *system_memory;
70 static MemoryRegion *system_io;
71
72 AddressSpace address_space_io;
73 AddressSpace address_space_memory;
74
75 MemoryRegion io_mem_rom, io_mem_notdirty;
76 static MemoryRegion io_mem_unassigned;
77
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
80
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
83
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87 #define RAM_RESIZEABLE (1 << 2)
88
89 #endif
90
91 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
92 /* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
94 DEFINE_TLS(CPUState *, current_cpu);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
98 int use_icount;
99
100 #if !defined(CONFIG_USER_ONLY)
101
102 typedef struct PhysPageEntry PhysPageEntry;
103
104 struct PhysPageEntry {
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
106 uint32_t skip : 6;
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
108 uint32_t ptr : 26;
109 };
110
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
115
116 #define P_L2_BITS 9
117 #define P_L2_SIZE (1 << P_L2_BITS)
118
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121 typedef PhysPageEntry Node[P_L2_SIZE];
122
123 typedef struct PhysPageMap {
124 struct rcu_head rcu;
125
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132 } PhysPageMap;
133
134 struct AddressSpaceDispatch {
135 struct rcu_head rcu;
136
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
141 PhysPageMap map;
142 AddressSpace *as;
143 };
144
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t {
147 MemoryRegion iomem;
148 AddressSpace *as;
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151 } subpage_t;
152
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
157
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener *listener);
161
162 static MemoryRegion io_mem_watch;
163 #endif
164
165 #if !defined(CONFIG_USER_ONLY)
166
167 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
168 {
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
173 }
174 }
175
176 static uint32_t phys_map_node_alloc(PhysPageMap *map)
177 {
178 unsigned i;
179 uint32_t ret;
180
181 ret = map->nodes_nb++;
182 assert(ret != PHYS_MAP_NODE_NIL);
183 assert(ret != map->nodes_nb_alloc);
184 for (i = 0; i < P_L2_SIZE; ++i) {
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
187 }
188 return ret;
189 }
190
191 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
193 int level)
194 {
195 PhysPageEntry *p;
196 int i;
197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
198
199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
202 if (level == 0) {
203 for (i = 0; i < P_L2_SIZE; i++) {
204 p[i].skip = 0;
205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
206 }
207 }
208 } else {
209 p = map->nodes[lp->ptr];
210 }
211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
212
213 while (*nb && lp < &p[P_L2_SIZE]) {
214 if ((*index & (step - 1)) == 0 && *nb >= step) {
215 lp->skip = 0;
216 lp->ptr = leaf;
217 *index += step;
218 *nb -= step;
219 } else {
220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
221 }
222 ++lp;
223 }
224 }
225
226 static void phys_page_set(AddressSpaceDispatch *d,
227 hwaddr index, hwaddr nb,
228 uint16_t leaf)
229 {
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
232
233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
234 }
235
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240 {
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287 }
288
289 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290 {
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
295 }
296 }
297
298 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
299 Node *nodes, MemoryRegionSection *sections)
300 {
301 PhysPageEntry *p;
302 hwaddr index = addr >> TARGET_PAGE_BITS;
303 int i;
304
305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
307 return &sections[PHYS_SECTION_UNASSIGNED];
308 }
309 p = nodes[lp.ptr];
310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
311 }
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
320 }
321
322 bool memory_region_is_unassigned(MemoryRegion *mr)
323 {
324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
325 && mr != &io_mem_watch;
326 }
327
328 /* Called from RCU critical section */
329 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
330 hwaddr addr,
331 bool resolve_subpage)
332 {
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
340 }
341 return section;
342 }
343
344 /* Called from RCU critical section */
345 static MemoryRegionSection *
346 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
347 hwaddr *plen, bool resolve_subpage)
348 {
349 MemoryRegionSection *section;
350 Int128 diff;
351
352 section = address_space_lookup_region(d, addr, resolve_subpage);
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
361 return section;
362 }
363
364 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365 {
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374 }
375
376 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
379 {
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
383
384 rcu_read_lock();
385 for (;;) {
386 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
387 section = address_space_translate_internal(d, addr, &addr, plen, true);
388 mr = section->mr;
389
390 if (!mr->iommu_ops) {
391 break;
392 }
393
394 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
395 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
396 | (addr & iotlb.addr_mask));
397 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
398 if (!(iotlb.perm & (1 << is_write))) {
399 mr = &io_mem_unassigned;
400 break;
401 }
402
403 as = iotlb.target_as;
404 }
405
406 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
407 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
408 *plen = MIN(page, *plen);
409 }
410
411 *xlat = addr;
412 rcu_read_unlock();
413 return mr;
414 }
415
416 /* Called from RCU critical section */
417 MemoryRegionSection *
418 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
419 hwaddr *xlat, hwaddr *plen)
420 {
421 MemoryRegionSection *section;
422 section = address_space_translate_internal(cpu->memory_dispatch,
423 addr, xlat, plen, false);
424
425 assert(!section->mr->iommu_ops);
426 return section;
427 }
428 #endif
429
430 #if !defined(CONFIG_USER_ONLY)
431
432 static int cpu_common_post_load(void *opaque, int version_id)
433 {
434 CPUState *cpu = opaque;
435
436 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
437 version_id is increased. */
438 cpu->interrupt_request &= ~0x01;
439 tlb_flush(cpu, 1);
440
441 return 0;
442 }
443
444 static int cpu_common_pre_load(void *opaque)
445 {
446 CPUState *cpu = opaque;
447
448 cpu->exception_index = -1;
449
450 return 0;
451 }
452
453 static bool cpu_common_exception_index_needed(void *opaque)
454 {
455 CPUState *cpu = opaque;
456
457 return tcg_enabled() && cpu->exception_index != -1;
458 }
459
460 static const VMStateDescription vmstate_cpu_common_exception_index = {
461 .name = "cpu_common/exception_index",
462 .version_id = 1,
463 .minimum_version_id = 1,
464 .fields = (VMStateField[]) {
465 VMSTATE_INT32(exception_index, CPUState),
466 VMSTATE_END_OF_LIST()
467 }
468 };
469
470 const VMStateDescription vmstate_cpu_common = {
471 .name = "cpu_common",
472 .version_id = 1,
473 .minimum_version_id = 1,
474 .pre_load = cpu_common_pre_load,
475 .post_load = cpu_common_post_load,
476 .fields = (VMStateField[]) {
477 VMSTATE_UINT32(halted, CPUState),
478 VMSTATE_UINT32(interrupt_request, CPUState),
479 VMSTATE_END_OF_LIST()
480 },
481 .subsections = (VMStateSubsection[]) {
482 {
483 .vmsd = &vmstate_cpu_common_exception_index,
484 .needed = cpu_common_exception_index_needed,
485 } , {
486 /* empty */
487 }
488 }
489 };
490
491 #endif
492
493 CPUState *qemu_get_cpu(int index)
494 {
495 CPUState *cpu;
496
497 CPU_FOREACH(cpu) {
498 if (cpu->cpu_index == index) {
499 return cpu;
500 }
501 }
502
503 return NULL;
504 }
505
506 #if !defined(CONFIG_USER_ONLY)
507 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
508 {
509 /* We only support one address space per cpu at the moment. */
510 assert(cpu->as == as);
511
512 if (cpu->tcg_as_listener) {
513 memory_listener_unregister(cpu->tcg_as_listener);
514 } else {
515 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
516 }
517 cpu->tcg_as_listener->commit = tcg_commit;
518 memory_listener_register(cpu->tcg_as_listener, as);
519 }
520 #endif
521
522 void cpu_exec_init(CPUArchState *env)
523 {
524 CPUState *cpu = ENV_GET_CPU(env);
525 CPUClass *cc = CPU_GET_CLASS(cpu);
526 CPUState *some_cpu;
527 int cpu_index;
528
529 #if defined(CONFIG_USER_ONLY)
530 cpu_list_lock();
531 #endif
532 cpu_index = 0;
533 CPU_FOREACH(some_cpu) {
534 cpu_index++;
535 }
536 cpu->cpu_index = cpu_index;
537 cpu->numa_node = 0;
538 QTAILQ_INIT(&cpu->breakpoints);
539 QTAILQ_INIT(&cpu->watchpoints);
540 #ifndef CONFIG_USER_ONLY
541 cpu->as = &address_space_memory;
542 cpu->thread_id = qemu_get_thread_id();
543 cpu_reload_memory_map(cpu);
544 #endif
545 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
546 #if defined(CONFIG_USER_ONLY)
547 cpu_list_unlock();
548 #endif
549 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
550 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
551 }
552 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
553 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
554 cpu_save, cpu_load, env);
555 assert(cc->vmsd == NULL);
556 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
557 #endif
558 if (cc->vmsd != NULL) {
559 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
560 }
561 }
562
563 #if defined(CONFIG_USER_ONLY)
564 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
565 {
566 tb_invalidate_phys_page_range(pc, pc + 1, 0);
567 }
568 #else
569 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
570 {
571 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
572 if (phys != -1) {
573 tb_invalidate_phys_addr(cpu->as,
574 phys | (pc & ~TARGET_PAGE_MASK));
575 }
576 }
577 #endif
578
579 #if defined(CONFIG_USER_ONLY)
580 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
581
582 {
583 }
584
585 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
586 int flags)
587 {
588 return -ENOSYS;
589 }
590
591 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
592 {
593 }
594
595 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
596 int flags, CPUWatchpoint **watchpoint)
597 {
598 return -ENOSYS;
599 }
600 #else
601 /* Add a watchpoint. */
602 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
603 int flags, CPUWatchpoint **watchpoint)
604 {
605 CPUWatchpoint *wp;
606
607 /* forbid ranges which are empty or run off the end of the address space */
608 if (len == 0 || (addr + len - 1) < addr) {
609 error_report("tried to set invalid watchpoint at %"
610 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
611 return -EINVAL;
612 }
613 wp = g_malloc(sizeof(*wp));
614
615 wp->vaddr = addr;
616 wp->len = len;
617 wp->flags = flags;
618
619 /* keep all GDB-injected watchpoints in front */
620 if (flags & BP_GDB) {
621 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
622 } else {
623 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
624 }
625
626 tlb_flush_page(cpu, addr);
627
628 if (watchpoint)
629 *watchpoint = wp;
630 return 0;
631 }
632
633 /* Remove a specific watchpoint. */
634 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
635 int flags)
636 {
637 CPUWatchpoint *wp;
638
639 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
640 if (addr == wp->vaddr && len == wp->len
641 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
642 cpu_watchpoint_remove_by_ref(cpu, wp);
643 return 0;
644 }
645 }
646 return -ENOENT;
647 }
648
649 /* Remove a specific watchpoint by reference. */
650 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
651 {
652 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
653
654 tlb_flush_page(cpu, watchpoint->vaddr);
655
656 g_free(watchpoint);
657 }
658
659 /* Remove all matching watchpoints. */
660 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
661 {
662 CPUWatchpoint *wp, *next;
663
664 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
665 if (wp->flags & mask) {
666 cpu_watchpoint_remove_by_ref(cpu, wp);
667 }
668 }
669 }
670
671 /* Return true if this watchpoint address matches the specified
672 * access (ie the address range covered by the watchpoint overlaps
673 * partially or completely with the address range covered by the
674 * access).
675 */
676 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
677 vaddr addr,
678 vaddr len)
679 {
680 /* We know the lengths are non-zero, but a little caution is
681 * required to avoid errors in the case where the range ends
682 * exactly at the top of the address space and so addr + len
683 * wraps round to zero.
684 */
685 vaddr wpend = wp->vaddr + wp->len - 1;
686 vaddr addrend = addr + len - 1;
687
688 return !(addr > wpend || wp->vaddr > addrend);
689 }
690
691 #endif
692
693 /* Add a breakpoint. */
694 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
695 CPUBreakpoint **breakpoint)
696 {
697 CPUBreakpoint *bp;
698
699 bp = g_malloc(sizeof(*bp));
700
701 bp->pc = pc;
702 bp->flags = flags;
703
704 /* keep all GDB-injected breakpoints in front */
705 if (flags & BP_GDB) {
706 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
707 } else {
708 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
709 }
710
711 breakpoint_invalidate(cpu, pc);
712
713 if (breakpoint) {
714 *breakpoint = bp;
715 }
716 return 0;
717 }
718
719 /* Remove a specific breakpoint. */
720 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
721 {
722 CPUBreakpoint *bp;
723
724 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
725 if (bp->pc == pc && bp->flags == flags) {
726 cpu_breakpoint_remove_by_ref(cpu, bp);
727 return 0;
728 }
729 }
730 return -ENOENT;
731 }
732
733 /* Remove a specific breakpoint by reference. */
734 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
735 {
736 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737
738 breakpoint_invalidate(cpu, breakpoint->pc);
739
740 g_free(breakpoint);
741 }
742
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
745 {
746 CPUBreakpoint *bp, *next;
747
748 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
749 if (bp->flags & mask) {
750 cpu_breakpoint_remove_by_ref(cpu, bp);
751 }
752 }
753 }
754
755 /* enable or disable single step mode. EXCP_DEBUG is returned by the
756 CPU loop after each instruction */
757 void cpu_single_step(CPUState *cpu, int enabled)
758 {
759 if (cpu->singlestep_enabled != enabled) {
760 cpu->singlestep_enabled = enabled;
761 if (kvm_enabled()) {
762 kvm_update_guest_debug(cpu, 0);
763 } else {
764 /* must flush all the translated code to avoid inconsistencies */
765 /* XXX: only flush what is necessary */
766 CPUArchState *env = cpu->env_ptr;
767 tb_flush(env);
768 }
769 }
770 }
771
772 void cpu_abort(CPUState *cpu, const char *fmt, ...)
773 {
774 va_list ap;
775 va_list ap2;
776
777 va_start(ap, fmt);
778 va_copy(ap2, ap);
779 fprintf(stderr, "qemu: fatal: ");
780 vfprintf(stderr, fmt, ap);
781 fprintf(stderr, "\n");
782 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
783 if (qemu_log_enabled()) {
784 qemu_log("qemu: fatal: ");
785 qemu_log_vprintf(fmt, ap2);
786 qemu_log("\n");
787 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
788 qemu_log_flush();
789 qemu_log_close();
790 }
791 va_end(ap2);
792 va_end(ap);
793 #if defined(CONFIG_USER_ONLY)
794 {
795 struct sigaction act;
796 sigfillset(&act.sa_mask);
797 act.sa_handler = SIG_DFL;
798 sigaction(SIGABRT, &act, NULL);
799 }
800 #endif
801 abort();
802 }
803
804 #if !defined(CONFIG_USER_ONLY)
805 /* Called from RCU critical section */
806 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
807 {
808 RAMBlock *block;
809
810 block = atomic_rcu_read(&ram_list.mru_block);
811 if (block && addr - block->offset < block->max_length) {
812 goto found;
813 }
814 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
815 if (addr - block->offset < block->max_length) {
816 goto found;
817 }
818 }
819
820 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
821 abort();
822
823 found:
824 /* It is safe to write mru_block outside the iothread lock. This
825 * is what happens:
826 *
827 * mru_block = xxx
828 * rcu_read_unlock()
829 * xxx removed from list
830 * rcu_read_lock()
831 * read mru_block
832 * mru_block = NULL;
833 * call_rcu(reclaim_ramblock, xxx);
834 * rcu_read_unlock()
835 *
836 * atomic_rcu_set is not needed here. The block was already published
837 * when it was placed into the list. Here we're just making an extra
838 * copy of the pointer.
839 */
840 ram_list.mru_block = block;
841 return block;
842 }
843
844 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
845 {
846 ram_addr_t start1;
847 RAMBlock *block;
848 ram_addr_t end;
849
850 end = TARGET_PAGE_ALIGN(start + length);
851 start &= TARGET_PAGE_MASK;
852
853 rcu_read_lock();
854 block = qemu_get_ram_block(start);
855 assert(block == qemu_get_ram_block(end - 1));
856 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
857 cpu_tlb_reset_dirty_all(start1, length);
858 rcu_read_unlock();
859 }
860
861 /* Note: start and end must be within the same ram block. */
862 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
863 unsigned client)
864 {
865 if (length == 0)
866 return;
867 cpu_physical_memory_clear_dirty_range_type(start, length, client);
868
869 if (tcg_enabled()) {
870 tlb_reset_dirty_range_all(start, length);
871 }
872 }
873
874 static void cpu_physical_memory_set_dirty_tracking(bool enable)
875 {
876 in_migration = enable;
877 }
878
879 /* Called from RCU critical section */
880 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
881 MemoryRegionSection *section,
882 target_ulong vaddr,
883 hwaddr paddr, hwaddr xlat,
884 int prot,
885 target_ulong *address)
886 {
887 hwaddr iotlb;
888 CPUWatchpoint *wp;
889
890 if (memory_region_is_ram(section->mr)) {
891 /* Normal RAM. */
892 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
893 + xlat;
894 if (!section->readonly) {
895 iotlb |= PHYS_SECTION_NOTDIRTY;
896 } else {
897 iotlb |= PHYS_SECTION_ROM;
898 }
899 } else {
900 iotlb = section - section->address_space->dispatch->map.sections;
901 iotlb += xlat;
902 }
903
904 /* Make accesses to pages with watchpoints go via the
905 watchpoint trap routines. */
906 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
907 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
908 /* Avoid trapping reads of pages with a write breakpoint. */
909 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
910 iotlb = PHYS_SECTION_WATCH + paddr;
911 *address |= TLB_MMIO;
912 break;
913 }
914 }
915 }
916
917 return iotlb;
918 }
919 #endif /* defined(CONFIG_USER_ONLY) */
920
921 #if !defined(CONFIG_USER_ONLY)
922
923 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
924 uint16_t section);
925 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
926
927 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
928 qemu_anon_ram_alloc;
929
930 /*
931 * Set a custom physical guest memory alloator.
932 * Accelerators with unusual needs may need this. Hopefully, we can
933 * get rid of it eventually.
934 */
935 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
936 {
937 phys_mem_alloc = alloc;
938 }
939
940 static uint16_t phys_section_add(PhysPageMap *map,
941 MemoryRegionSection *section)
942 {
943 /* The physical section number is ORed with a page-aligned
944 * pointer to produce the iotlb entries. Thus it should
945 * never overflow into the page-aligned value.
946 */
947 assert(map->sections_nb < TARGET_PAGE_SIZE);
948
949 if (map->sections_nb == map->sections_nb_alloc) {
950 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
951 map->sections = g_renew(MemoryRegionSection, map->sections,
952 map->sections_nb_alloc);
953 }
954 map->sections[map->sections_nb] = *section;
955 memory_region_ref(section->mr);
956 return map->sections_nb++;
957 }
958
959 static void phys_section_destroy(MemoryRegion *mr)
960 {
961 memory_region_unref(mr);
962
963 if (mr->subpage) {
964 subpage_t *subpage = container_of(mr, subpage_t, iomem);
965 object_unref(OBJECT(&subpage->iomem));
966 g_free(subpage);
967 }
968 }
969
970 static void phys_sections_free(PhysPageMap *map)
971 {
972 while (map->sections_nb > 0) {
973 MemoryRegionSection *section = &map->sections[--map->sections_nb];
974 phys_section_destroy(section->mr);
975 }
976 g_free(map->sections);
977 g_free(map->nodes);
978 }
979
980 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
981 {
982 subpage_t *subpage;
983 hwaddr base = section->offset_within_address_space
984 & TARGET_PAGE_MASK;
985 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
986 d->map.nodes, d->map.sections);
987 MemoryRegionSection subsection = {
988 .offset_within_address_space = base,
989 .size = int128_make64(TARGET_PAGE_SIZE),
990 };
991 hwaddr start, end;
992
993 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
994
995 if (!(existing->mr->subpage)) {
996 subpage = subpage_init(d->as, base);
997 subsection.address_space = d->as;
998 subsection.mr = &subpage->iomem;
999 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1000 phys_section_add(&d->map, &subsection));
1001 } else {
1002 subpage = container_of(existing->mr, subpage_t, iomem);
1003 }
1004 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1005 end = start + int128_get64(section->size) - 1;
1006 subpage_register(subpage, start, end,
1007 phys_section_add(&d->map, section));
1008 }
1009
1010
1011 static void register_multipage(AddressSpaceDispatch *d,
1012 MemoryRegionSection *section)
1013 {
1014 hwaddr start_addr = section->offset_within_address_space;
1015 uint16_t section_index = phys_section_add(&d->map, section);
1016 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1017 TARGET_PAGE_BITS));
1018
1019 assert(num_pages);
1020 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1021 }
1022
1023 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1024 {
1025 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1026 AddressSpaceDispatch *d = as->next_dispatch;
1027 MemoryRegionSection now = *section, remain = *section;
1028 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1029
1030 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1031 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1032 - now.offset_within_address_space;
1033
1034 now.size = int128_min(int128_make64(left), now.size);
1035 register_subpage(d, &now);
1036 } else {
1037 now.size = int128_zero();
1038 }
1039 while (int128_ne(remain.size, now.size)) {
1040 remain.size = int128_sub(remain.size, now.size);
1041 remain.offset_within_address_space += int128_get64(now.size);
1042 remain.offset_within_region += int128_get64(now.size);
1043 now = remain;
1044 if (int128_lt(remain.size, page_size)) {
1045 register_subpage(d, &now);
1046 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1047 now.size = page_size;
1048 register_subpage(d, &now);
1049 } else {
1050 now.size = int128_and(now.size, int128_neg(page_size));
1051 register_multipage(d, &now);
1052 }
1053 }
1054 }
1055
1056 void qemu_flush_coalesced_mmio_buffer(void)
1057 {
1058 if (kvm_enabled())
1059 kvm_flush_coalesced_mmio_buffer();
1060 }
1061
1062 void qemu_mutex_lock_ramlist(void)
1063 {
1064 qemu_mutex_lock(&ram_list.mutex);
1065 }
1066
1067 void qemu_mutex_unlock_ramlist(void)
1068 {
1069 qemu_mutex_unlock(&ram_list.mutex);
1070 }
1071
1072 #ifdef __linux__
1073
1074 #include <sys/vfs.h>
1075
1076 #define HUGETLBFS_MAGIC 0x958458f6
1077
1078 static long gethugepagesize(const char *path, Error **errp)
1079 {
1080 struct statfs fs;
1081 int ret;
1082
1083 do {
1084 ret = statfs(path, &fs);
1085 } while (ret != 0 && errno == EINTR);
1086
1087 if (ret != 0) {
1088 error_setg_errno(errp, errno, "failed to get page size of file %s",
1089 path);
1090 return 0;
1091 }
1092
1093 if (fs.f_type != HUGETLBFS_MAGIC)
1094 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1095
1096 return fs.f_bsize;
1097 }
1098
1099 static void *file_ram_alloc(RAMBlock *block,
1100 ram_addr_t memory,
1101 const char *path,
1102 Error **errp)
1103 {
1104 char *filename;
1105 char *sanitized_name;
1106 char *c;
1107 void *area = NULL;
1108 int fd;
1109 uint64_t hpagesize;
1110 Error *local_err = NULL;
1111
1112 hpagesize = gethugepagesize(path, &local_err);
1113 if (local_err) {
1114 error_propagate(errp, local_err);
1115 goto error;
1116 }
1117 block->mr->align = hpagesize;
1118
1119 if (memory < hpagesize) {
1120 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1121 "or larger than huge page size 0x%" PRIx64,
1122 memory, hpagesize);
1123 goto error;
1124 }
1125
1126 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1127 error_setg(errp,
1128 "host lacks kvm mmu notifiers, -mem-path unsupported");
1129 goto error;
1130 }
1131
1132 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1133 sanitized_name = g_strdup(memory_region_name(block->mr));
1134 for (c = sanitized_name; *c != '\0'; c++) {
1135 if (*c == '/')
1136 *c = '_';
1137 }
1138
1139 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1140 sanitized_name);
1141 g_free(sanitized_name);
1142
1143 fd = mkstemp(filename);
1144 if (fd < 0) {
1145 error_setg_errno(errp, errno,
1146 "unable to create backing store for hugepages");
1147 g_free(filename);
1148 goto error;
1149 }
1150 unlink(filename);
1151 g_free(filename);
1152
1153 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1154
1155 /*
1156 * ftruncate is not supported by hugetlbfs in older
1157 * hosts, so don't bother bailing out on errors.
1158 * If anything goes wrong with it under other filesystems,
1159 * mmap will fail.
1160 */
1161 if (ftruncate(fd, memory)) {
1162 perror("ftruncate");
1163 }
1164
1165 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1166 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1167 fd, 0);
1168 if (area == MAP_FAILED) {
1169 error_setg_errno(errp, errno,
1170 "unable to map backing store for hugepages");
1171 close(fd);
1172 goto error;
1173 }
1174
1175 if (mem_prealloc) {
1176 os_mem_prealloc(fd, area, memory);
1177 }
1178
1179 block->fd = fd;
1180 return area;
1181
1182 error:
1183 if (mem_prealloc) {
1184 error_report("%s", error_get_pretty(*errp));
1185 exit(1);
1186 }
1187 return NULL;
1188 }
1189 #endif
1190
1191 /* Called with the ramlist lock held. */
1192 static ram_addr_t find_ram_offset(ram_addr_t size)
1193 {
1194 RAMBlock *block, *next_block;
1195 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1196
1197 assert(size != 0); /* it would hand out same offset multiple times */
1198
1199 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1200 return 0;
1201 }
1202
1203 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1204 ram_addr_t end, next = RAM_ADDR_MAX;
1205
1206 end = block->offset + block->max_length;
1207
1208 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1209 if (next_block->offset >= end) {
1210 next = MIN(next, next_block->offset);
1211 }
1212 }
1213 if (next - end >= size && next - end < mingap) {
1214 offset = end;
1215 mingap = next - end;
1216 }
1217 }
1218
1219 if (offset == RAM_ADDR_MAX) {
1220 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1221 (uint64_t)size);
1222 abort();
1223 }
1224
1225 return offset;
1226 }
1227
1228 ram_addr_t last_ram_offset(void)
1229 {
1230 RAMBlock *block;
1231 ram_addr_t last = 0;
1232
1233 rcu_read_lock();
1234 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1235 last = MAX(last, block->offset + block->max_length);
1236 }
1237 rcu_read_unlock();
1238 return last;
1239 }
1240
1241 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1242 {
1243 int ret;
1244
1245 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1246 if (!machine_dump_guest_core(current_machine)) {
1247 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1248 if (ret) {
1249 perror("qemu_madvise");
1250 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1251 "but dump_guest_core=off specified\n");
1252 }
1253 }
1254 }
1255
1256 /* Called within an RCU critical section, or while the ramlist lock
1257 * is held.
1258 */
1259 static RAMBlock *find_ram_block(ram_addr_t addr)
1260 {
1261 RAMBlock *block;
1262
1263 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1264 if (block->offset == addr) {
1265 return block;
1266 }
1267 }
1268
1269 return NULL;
1270 }
1271
1272 /* Called with iothread lock held. */
1273 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1274 {
1275 RAMBlock *new_block, *block;
1276
1277 rcu_read_lock();
1278 new_block = find_ram_block(addr);
1279 assert(new_block);
1280 assert(!new_block->idstr[0]);
1281
1282 if (dev) {
1283 char *id = qdev_get_dev_path(dev);
1284 if (id) {
1285 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1286 g_free(id);
1287 }
1288 }
1289 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1290
1291 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1292 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1293 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1294 new_block->idstr);
1295 abort();
1296 }
1297 }
1298 rcu_read_unlock();
1299 }
1300
1301 /* Called with iothread lock held. */
1302 void qemu_ram_unset_idstr(ram_addr_t addr)
1303 {
1304 RAMBlock *block;
1305
1306 /* FIXME: arch_init.c assumes that this is not called throughout
1307 * migration. Ignore the problem since hot-unplug during migration
1308 * does not work anyway.
1309 */
1310
1311 rcu_read_lock();
1312 block = find_ram_block(addr);
1313 if (block) {
1314 memset(block->idstr, 0, sizeof(block->idstr));
1315 }
1316 rcu_read_unlock();
1317 }
1318
1319 static int memory_try_enable_merging(void *addr, size_t len)
1320 {
1321 if (!machine_mem_merge(current_machine)) {
1322 /* disabled by the user */
1323 return 0;
1324 }
1325
1326 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1327 }
1328
1329 /* Only legal before guest might have detected the memory size: e.g. on
1330 * incoming migration, or right after reset.
1331 *
1332 * As memory core doesn't know how is memory accessed, it is up to
1333 * resize callback to update device state and/or add assertions to detect
1334 * misuse, if necessary.
1335 */
1336 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1337 {
1338 RAMBlock *block = find_ram_block(base);
1339
1340 assert(block);
1341
1342 newsize = TARGET_PAGE_ALIGN(newsize);
1343
1344 if (block->used_length == newsize) {
1345 return 0;
1346 }
1347
1348 if (!(block->flags & RAM_RESIZEABLE)) {
1349 error_setg_errno(errp, EINVAL,
1350 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1351 " in != 0x" RAM_ADDR_FMT, block->idstr,
1352 newsize, block->used_length);
1353 return -EINVAL;
1354 }
1355
1356 if (block->max_length < newsize) {
1357 error_setg_errno(errp, EINVAL,
1358 "Length too large: %s: 0x" RAM_ADDR_FMT
1359 " > 0x" RAM_ADDR_FMT, block->idstr,
1360 newsize, block->max_length);
1361 return -EINVAL;
1362 }
1363
1364 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1365 block->used_length = newsize;
1366 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1367 memory_region_set_size(block->mr, newsize);
1368 if (block->resized) {
1369 block->resized(block->idstr, newsize, block->host);
1370 }
1371 return 0;
1372 }
1373
1374 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1375 {
1376 RAMBlock *block;
1377 RAMBlock *last_block = NULL;
1378 ram_addr_t old_ram_size, new_ram_size;
1379
1380 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1381
1382 qemu_mutex_lock_ramlist();
1383 new_block->offset = find_ram_offset(new_block->max_length);
1384
1385 if (!new_block->host) {
1386 if (xen_enabled()) {
1387 xen_ram_alloc(new_block->offset, new_block->max_length,
1388 new_block->mr);
1389 } else {
1390 new_block->host = phys_mem_alloc(new_block->max_length,
1391 &new_block->mr->align);
1392 if (!new_block->host) {
1393 error_setg_errno(errp, errno,
1394 "cannot set up guest memory '%s'",
1395 memory_region_name(new_block->mr));
1396 qemu_mutex_unlock_ramlist();
1397 return -1;
1398 }
1399 memory_try_enable_merging(new_block->host, new_block->max_length);
1400 }
1401 }
1402
1403 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1404 * QLIST (which has an RCU-friendly variant) does not have insertion at
1405 * tail, so save the last element in last_block.
1406 */
1407 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1408 last_block = block;
1409 if (block->max_length < new_block->max_length) {
1410 break;
1411 }
1412 }
1413 if (block) {
1414 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1415 } else if (last_block) {
1416 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1417 } else { /* list is empty */
1418 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1419 }
1420 ram_list.mru_block = NULL;
1421
1422 /* Write list before version */
1423 smp_wmb();
1424 ram_list.version++;
1425 qemu_mutex_unlock_ramlist();
1426
1427 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1428
1429 if (new_ram_size > old_ram_size) {
1430 int i;
1431
1432 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1433 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1434 ram_list.dirty_memory[i] =
1435 bitmap_zero_extend(ram_list.dirty_memory[i],
1436 old_ram_size, new_ram_size);
1437 }
1438 }
1439 cpu_physical_memory_set_dirty_range(new_block->offset,
1440 new_block->used_length);
1441
1442 if (new_block->host) {
1443 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1444 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1445 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1446 if (kvm_enabled()) {
1447 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1448 }
1449 }
1450
1451 return new_block->offset;
1452 }
1453
1454 #ifdef __linux__
1455 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1456 bool share, const char *mem_path,
1457 Error **errp)
1458 {
1459 RAMBlock *new_block;
1460 ram_addr_t addr;
1461 Error *local_err = NULL;
1462
1463 if (xen_enabled()) {
1464 error_setg(errp, "-mem-path not supported with Xen");
1465 return -1;
1466 }
1467
1468 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1469 /*
1470 * file_ram_alloc() needs to allocate just like
1471 * phys_mem_alloc, but we haven't bothered to provide
1472 * a hook there.
1473 */
1474 error_setg(errp,
1475 "-mem-path not supported with this accelerator");
1476 return -1;
1477 }
1478
1479 size = TARGET_PAGE_ALIGN(size);
1480 new_block = g_malloc0(sizeof(*new_block));
1481 new_block->mr = mr;
1482 new_block->used_length = size;
1483 new_block->max_length = size;
1484 new_block->flags = share ? RAM_SHARED : 0;
1485 new_block->host = file_ram_alloc(new_block, size,
1486 mem_path, errp);
1487 if (!new_block->host) {
1488 g_free(new_block);
1489 return -1;
1490 }
1491
1492 addr = ram_block_add(new_block, &local_err);
1493 if (local_err) {
1494 g_free(new_block);
1495 error_propagate(errp, local_err);
1496 return -1;
1497 }
1498 return addr;
1499 }
1500 #endif
1501
1502 static
1503 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1504 void (*resized)(const char*,
1505 uint64_t length,
1506 void *host),
1507 void *host, bool resizeable,
1508 MemoryRegion *mr, Error **errp)
1509 {
1510 RAMBlock *new_block;
1511 ram_addr_t addr;
1512 Error *local_err = NULL;
1513
1514 size = TARGET_PAGE_ALIGN(size);
1515 max_size = TARGET_PAGE_ALIGN(max_size);
1516 new_block = g_malloc0(sizeof(*new_block));
1517 new_block->mr = mr;
1518 new_block->resized = resized;
1519 new_block->used_length = size;
1520 new_block->max_length = max_size;
1521 assert(max_size >= size);
1522 new_block->fd = -1;
1523 new_block->host = host;
1524 if (host) {
1525 new_block->flags |= RAM_PREALLOC;
1526 }
1527 if (resizeable) {
1528 new_block->flags |= RAM_RESIZEABLE;
1529 }
1530 addr = ram_block_add(new_block, &local_err);
1531 if (local_err) {
1532 g_free(new_block);
1533 error_propagate(errp, local_err);
1534 return -1;
1535 }
1536 return addr;
1537 }
1538
1539 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1540 MemoryRegion *mr, Error **errp)
1541 {
1542 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1543 }
1544
1545 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1546 {
1547 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1548 }
1549
1550 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1551 void (*resized)(const char*,
1552 uint64_t length,
1553 void *host),
1554 MemoryRegion *mr, Error **errp)
1555 {
1556 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1557 }
1558
1559 void qemu_ram_free_from_ptr(ram_addr_t addr)
1560 {
1561 RAMBlock *block;
1562
1563 qemu_mutex_lock_ramlist();
1564 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1565 if (addr == block->offset) {
1566 QLIST_REMOVE_RCU(block, next);
1567 ram_list.mru_block = NULL;
1568 /* Write list before version */
1569 smp_wmb();
1570 ram_list.version++;
1571 g_free_rcu(block, rcu);
1572 break;
1573 }
1574 }
1575 qemu_mutex_unlock_ramlist();
1576 }
1577
1578 static void reclaim_ramblock(RAMBlock *block)
1579 {
1580 if (block->flags & RAM_PREALLOC) {
1581 ;
1582 } else if (xen_enabled()) {
1583 xen_invalidate_map_cache_entry(block->host);
1584 #ifndef _WIN32
1585 } else if (block->fd >= 0) {
1586 munmap(block->host, block->max_length);
1587 close(block->fd);
1588 #endif
1589 } else {
1590 qemu_anon_ram_free(block->host, block->max_length);
1591 }
1592 g_free(block);
1593 }
1594
1595 void qemu_ram_free(ram_addr_t addr)
1596 {
1597 RAMBlock *block;
1598
1599 qemu_mutex_lock_ramlist();
1600 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1601 if (addr == block->offset) {
1602 QLIST_REMOVE_RCU(block, next);
1603 ram_list.mru_block = NULL;
1604 /* Write list before version */
1605 smp_wmb();
1606 ram_list.version++;
1607 call_rcu(block, reclaim_ramblock, rcu);
1608 break;
1609 }
1610 }
1611 qemu_mutex_unlock_ramlist();
1612 }
1613
1614 #ifndef _WIN32
1615 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1616 {
1617 RAMBlock *block;
1618 ram_addr_t offset;
1619 int flags;
1620 void *area, *vaddr;
1621
1622 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1623 offset = addr - block->offset;
1624 if (offset < block->max_length) {
1625 vaddr = ramblock_ptr(block, offset);
1626 if (block->flags & RAM_PREALLOC) {
1627 ;
1628 } else if (xen_enabled()) {
1629 abort();
1630 } else {
1631 flags = MAP_FIXED;
1632 if (block->fd >= 0) {
1633 flags |= (block->flags & RAM_SHARED ?
1634 MAP_SHARED : MAP_PRIVATE);
1635 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1636 flags, block->fd, offset);
1637 } else {
1638 /*
1639 * Remap needs to match alloc. Accelerators that
1640 * set phys_mem_alloc never remap. If they did,
1641 * we'd need a remap hook here.
1642 */
1643 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1644
1645 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, -1, 0);
1648 }
1649 if (area != vaddr) {
1650 fprintf(stderr, "Could not remap addr: "
1651 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1652 length, addr);
1653 exit(1);
1654 }
1655 memory_try_enable_merging(vaddr, length);
1656 qemu_ram_setup_dump(vaddr, length);
1657 }
1658 }
1659 }
1660 }
1661 #endif /* !_WIN32 */
1662
1663 int qemu_get_ram_fd(ram_addr_t addr)
1664 {
1665 RAMBlock *block;
1666 int fd;
1667
1668 rcu_read_lock();
1669 block = qemu_get_ram_block(addr);
1670 fd = block->fd;
1671 rcu_read_unlock();
1672 return fd;
1673 }
1674
1675 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1676 {
1677 RAMBlock *block;
1678 void *ptr;
1679
1680 rcu_read_lock();
1681 block = qemu_get_ram_block(addr);
1682 ptr = ramblock_ptr(block, 0);
1683 rcu_read_unlock();
1684 return ptr;
1685 }
1686
1687 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1688 * This should not be used for general purpose DMA. Use address_space_map
1689 * or address_space_rw instead. For local memory (e.g. video ram) that the
1690 * device owns, use memory_region_get_ram_ptr.
1691 *
1692 * By the time this function returns, the returned pointer is not protected
1693 * by RCU anymore. If the caller is not within an RCU critical section and
1694 * does not hold the iothread lock, it must have other means of protecting the
1695 * pointer, such as a reference to the region that includes the incoming
1696 * ram_addr_t.
1697 */
1698 void *qemu_get_ram_ptr(ram_addr_t addr)
1699 {
1700 RAMBlock *block;
1701 void *ptr;
1702
1703 rcu_read_lock();
1704 block = qemu_get_ram_block(addr);
1705
1706 if (xen_enabled() && block->host == NULL) {
1707 /* We need to check if the requested address is in the RAM
1708 * because we don't want to map the entire memory in QEMU.
1709 * In that case just map until the end of the page.
1710 */
1711 if (block->offset == 0) {
1712 ptr = xen_map_cache(addr, 0, 0);
1713 goto unlock;
1714 }
1715
1716 block->host = xen_map_cache(block->offset, block->max_length, 1);
1717 }
1718 ptr = ramblock_ptr(block, addr - block->offset);
1719
1720 unlock:
1721 rcu_read_unlock();
1722 return ptr;
1723 }
1724
1725 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1726 * but takes a size argument.
1727 *
1728 * By the time this function returns, the returned pointer is not protected
1729 * by RCU anymore. If the caller is not within an RCU critical section and
1730 * does not hold the iothread lock, it must have other means of protecting the
1731 * pointer, such as a reference to the region that includes the incoming
1732 * ram_addr_t.
1733 */
1734 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1735 {
1736 void *ptr;
1737 if (*size == 0) {
1738 return NULL;
1739 }
1740 if (xen_enabled()) {
1741 return xen_map_cache(addr, *size, 1);
1742 } else {
1743 RAMBlock *block;
1744 rcu_read_lock();
1745 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1746 if (addr - block->offset < block->max_length) {
1747 if (addr - block->offset + *size > block->max_length)
1748 *size = block->max_length - addr + block->offset;
1749 ptr = ramblock_ptr(block, addr - block->offset);
1750 rcu_read_unlock();
1751 return ptr;
1752 }
1753 }
1754
1755 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1756 abort();
1757 }
1758 }
1759
1760 /* Some of the softmmu routines need to translate from a host pointer
1761 * (typically a TLB entry) back to a ram offset.
1762 *
1763 * By the time this function returns, the returned pointer is not protected
1764 * by RCU anymore. If the caller is not within an RCU critical section and
1765 * does not hold the iothread lock, it must have other means of protecting the
1766 * pointer, such as a reference to the region that includes the incoming
1767 * ram_addr_t.
1768 */
1769 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1770 {
1771 RAMBlock *block;
1772 uint8_t *host = ptr;
1773 MemoryRegion *mr;
1774
1775 if (xen_enabled()) {
1776 rcu_read_lock();
1777 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1778 mr = qemu_get_ram_block(*ram_addr)->mr;
1779 rcu_read_unlock();
1780 return mr;
1781 }
1782
1783 rcu_read_lock();
1784 block = atomic_rcu_read(&ram_list.mru_block);
1785 if (block && block->host && host - block->host < block->max_length) {
1786 goto found;
1787 }
1788
1789 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1790 /* This case append when the block is not mapped. */
1791 if (block->host == NULL) {
1792 continue;
1793 }
1794 if (host - block->host < block->max_length) {
1795 goto found;
1796 }
1797 }
1798
1799 rcu_read_unlock();
1800 return NULL;
1801
1802 found:
1803 *ram_addr = block->offset + (host - block->host);
1804 mr = block->mr;
1805 rcu_read_unlock();
1806 return mr;
1807 }
1808
1809 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1810 uint64_t val, unsigned size)
1811 {
1812 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1813 tb_invalidate_phys_page_fast(ram_addr, size);
1814 }
1815 switch (size) {
1816 case 1:
1817 stb_p(qemu_get_ram_ptr(ram_addr), val);
1818 break;
1819 case 2:
1820 stw_p(qemu_get_ram_ptr(ram_addr), val);
1821 break;
1822 case 4:
1823 stl_p(qemu_get_ram_ptr(ram_addr), val);
1824 break;
1825 default:
1826 abort();
1827 }
1828 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1829 /* we remove the notdirty callback only if the code has been
1830 flushed */
1831 if (!cpu_physical_memory_is_clean(ram_addr)) {
1832 CPUArchState *env = current_cpu->env_ptr;
1833 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1834 }
1835 }
1836
1837 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1838 unsigned size, bool is_write)
1839 {
1840 return is_write;
1841 }
1842
1843 static const MemoryRegionOps notdirty_mem_ops = {
1844 .write = notdirty_mem_write,
1845 .valid.accepts = notdirty_mem_accepts,
1846 .endianness = DEVICE_NATIVE_ENDIAN,
1847 };
1848
1849 /* Generate a debug exception if a watchpoint has been hit. */
1850 static void check_watchpoint(int offset, int len, int flags)
1851 {
1852 CPUState *cpu = current_cpu;
1853 CPUArchState *env = cpu->env_ptr;
1854 target_ulong pc, cs_base;
1855 target_ulong vaddr;
1856 CPUWatchpoint *wp;
1857 int cpu_flags;
1858
1859 if (cpu->watchpoint_hit) {
1860 /* We re-entered the check after replacing the TB. Now raise
1861 * the debug interrupt so that is will trigger after the
1862 * current instruction. */
1863 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1864 return;
1865 }
1866 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1867 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1868 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1869 && (wp->flags & flags)) {
1870 if (flags == BP_MEM_READ) {
1871 wp->flags |= BP_WATCHPOINT_HIT_READ;
1872 } else {
1873 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1874 }
1875 wp->hitaddr = vaddr;
1876 if (!cpu->watchpoint_hit) {
1877 cpu->watchpoint_hit = wp;
1878 tb_check_watchpoint(cpu);
1879 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1880 cpu->exception_index = EXCP_DEBUG;
1881 cpu_loop_exit(cpu);
1882 } else {
1883 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1884 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1885 cpu_resume_from_signal(cpu, NULL);
1886 }
1887 }
1888 } else {
1889 wp->flags &= ~BP_WATCHPOINT_HIT;
1890 }
1891 }
1892 }
1893
1894 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1895 so these check for a hit then pass through to the normal out-of-line
1896 phys routines. */
1897 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1898 unsigned size)
1899 {
1900 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1901 switch (size) {
1902 case 1: return ldub_phys(&address_space_memory, addr);
1903 case 2: return lduw_phys(&address_space_memory, addr);
1904 case 4: return ldl_phys(&address_space_memory, addr);
1905 default: abort();
1906 }
1907 }
1908
1909 static void watch_mem_write(void *opaque, hwaddr addr,
1910 uint64_t val, unsigned size)
1911 {
1912 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1913 switch (size) {
1914 case 1:
1915 stb_phys(&address_space_memory, addr, val);
1916 break;
1917 case 2:
1918 stw_phys(&address_space_memory, addr, val);
1919 break;
1920 case 4:
1921 stl_phys(&address_space_memory, addr, val);
1922 break;
1923 default: abort();
1924 }
1925 }
1926
1927 static const MemoryRegionOps watch_mem_ops = {
1928 .read = watch_mem_read,
1929 .write = watch_mem_write,
1930 .endianness = DEVICE_NATIVE_ENDIAN,
1931 };
1932
1933 static uint64_t subpage_read(void *opaque, hwaddr addr,
1934 unsigned len)
1935 {
1936 subpage_t *subpage = opaque;
1937 uint8_t buf[8];
1938
1939 #if defined(DEBUG_SUBPAGE)
1940 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1941 subpage, len, addr);
1942 #endif
1943 address_space_read(subpage->as, addr + subpage->base, buf, len);
1944 switch (len) {
1945 case 1:
1946 return ldub_p(buf);
1947 case 2:
1948 return lduw_p(buf);
1949 case 4:
1950 return ldl_p(buf);
1951 case 8:
1952 return ldq_p(buf);
1953 default:
1954 abort();
1955 }
1956 }
1957
1958 static void subpage_write(void *opaque, hwaddr addr,
1959 uint64_t value, unsigned len)
1960 {
1961 subpage_t *subpage = opaque;
1962 uint8_t buf[8];
1963
1964 #if defined(DEBUG_SUBPAGE)
1965 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1966 " value %"PRIx64"\n",
1967 __func__, subpage, len, addr, value);
1968 #endif
1969 switch (len) {
1970 case 1:
1971 stb_p(buf, value);
1972 break;
1973 case 2:
1974 stw_p(buf, value);
1975 break;
1976 case 4:
1977 stl_p(buf, value);
1978 break;
1979 case 8:
1980 stq_p(buf, value);
1981 break;
1982 default:
1983 abort();
1984 }
1985 address_space_write(subpage->as, addr + subpage->base, buf, len);
1986 }
1987
1988 static bool subpage_accepts(void *opaque, hwaddr addr,
1989 unsigned len, bool is_write)
1990 {
1991 subpage_t *subpage = opaque;
1992 #if defined(DEBUG_SUBPAGE)
1993 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1994 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1995 #endif
1996
1997 return address_space_access_valid(subpage->as, addr + subpage->base,
1998 len, is_write);
1999 }
2000
2001 static const MemoryRegionOps subpage_ops = {
2002 .read = subpage_read,
2003 .write = subpage_write,
2004 .impl.min_access_size = 1,
2005 .impl.max_access_size = 8,
2006 .valid.min_access_size = 1,
2007 .valid.max_access_size = 8,
2008 .valid.accepts = subpage_accepts,
2009 .endianness = DEVICE_NATIVE_ENDIAN,
2010 };
2011
2012 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2013 uint16_t section)
2014 {
2015 int idx, eidx;
2016
2017 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2018 return -1;
2019 idx = SUBPAGE_IDX(start);
2020 eidx = SUBPAGE_IDX(end);
2021 #if defined(DEBUG_SUBPAGE)
2022 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2023 __func__, mmio, start, end, idx, eidx, section);
2024 #endif
2025 for (; idx <= eidx; idx++) {
2026 mmio->sub_section[idx] = section;
2027 }
2028
2029 return 0;
2030 }
2031
2032 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2033 {
2034 subpage_t *mmio;
2035
2036 mmio = g_malloc0(sizeof(subpage_t));
2037
2038 mmio->as = as;
2039 mmio->base = base;
2040 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2041 NULL, TARGET_PAGE_SIZE);
2042 mmio->iomem.subpage = true;
2043 #if defined(DEBUG_SUBPAGE)
2044 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2045 mmio, base, TARGET_PAGE_SIZE);
2046 #endif
2047 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2048
2049 return mmio;
2050 }
2051
2052 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2053 MemoryRegion *mr)
2054 {
2055 assert(as);
2056 MemoryRegionSection section = {
2057 .address_space = as,
2058 .mr = mr,
2059 .offset_within_address_space = 0,
2060 .offset_within_region = 0,
2061 .size = int128_2_64(),
2062 };
2063
2064 return phys_section_add(map, &section);
2065 }
2066
2067 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2068 {
2069 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2070 MemoryRegionSection *sections = d->map.sections;
2071
2072 return sections[index & ~TARGET_PAGE_MASK].mr;
2073 }
2074
2075 static void io_mem_init(void)
2076 {
2077 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2078 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2079 NULL, UINT64_MAX);
2080 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
2081 NULL, UINT64_MAX);
2082 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2083 NULL, UINT64_MAX);
2084 }
2085
2086 static void mem_begin(MemoryListener *listener)
2087 {
2088 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2089 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2090 uint16_t n;
2091
2092 n = dummy_section(&d->map, as, &io_mem_unassigned);
2093 assert(n == PHYS_SECTION_UNASSIGNED);
2094 n = dummy_section(&d->map, as, &io_mem_notdirty);
2095 assert(n == PHYS_SECTION_NOTDIRTY);
2096 n = dummy_section(&d->map, as, &io_mem_rom);
2097 assert(n == PHYS_SECTION_ROM);
2098 n = dummy_section(&d->map, as, &io_mem_watch);
2099 assert(n == PHYS_SECTION_WATCH);
2100
2101 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2102 d->as = as;
2103 as->next_dispatch = d;
2104 }
2105
2106 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2107 {
2108 phys_sections_free(&d->map);
2109 g_free(d);
2110 }
2111
2112 static void mem_commit(MemoryListener *listener)
2113 {
2114 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2115 AddressSpaceDispatch *cur = as->dispatch;
2116 AddressSpaceDispatch *next = as->next_dispatch;
2117
2118 phys_page_compact_all(next, next->map.nodes_nb);
2119
2120 atomic_rcu_set(&as->dispatch, next);
2121 if (cur) {
2122 call_rcu(cur, address_space_dispatch_free, rcu);
2123 }
2124 }
2125
2126 static void tcg_commit(MemoryListener *listener)
2127 {
2128 CPUState *cpu;
2129
2130 /* since each CPU stores ram addresses in its TLB cache, we must
2131 reset the modified entries */
2132 /* XXX: slow ! */
2133 CPU_FOREACH(cpu) {
2134 /* FIXME: Disentangle the cpu.h circular files deps so we can
2135 directly get the right CPU from listener. */
2136 if (cpu->tcg_as_listener != listener) {
2137 continue;
2138 }
2139 cpu_reload_memory_map(cpu);
2140 }
2141 }
2142
2143 static void core_log_global_start(MemoryListener *listener)
2144 {
2145 cpu_physical_memory_set_dirty_tracking(true);
2146 }
2147
2148 static void core_log_global_stop(MemoryListener *listener)
2149 {
2150 cpu_physical_memory_set_dirty_tracking(false);
2151 }
2152
2153 static MemoryListener core_memory_listener = {
2154 .log_global_start = core_log_global_start,
2155 .log_global_stop = core_log_global_stop,
2156 .priority = 1,
2157 };
2158
2159 void address_space_init_dispatch(AddressSpace *as)
2160 {
2161 as->dispatch = NULL;
2162 as->dispatch_listener = (MemoryListener) {
2163 .begin = mem_begin,
2164 .commit = mem_commit,
2165 .region_add = mem_add,
2166 .region_nop = mem_add,
2167 .priority = 0,
2168 };
2169 memory_listener_register(&as->dispatch_listener, as);
2170 }
2171
2172 void address_space_unregister(AddressSpace *as)
2173 {
2174 memory_listener_unregister(&as->dispatch_listener);
2175 }
2176
2177 void address_space_destroy_dispatch(AddressSpace *as)
2178 {
2179 AddressSpaceDispatch *d = as->dispatch;
2180
2181 atomic_rcu_set(&as->dispatch, NULL);
2182 if (d) {
2183 call_rcu(d, address_space_dispatch_free, rcu);
2184 }
2185 }
2186
2187 static void memory_map_init(void)
2188 {
2189 system_memory = g_malloc(sizeof(*system_memory));
2190
2191 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2192 address_space_init(&address_space_memory, system_memory, "memory");
2193
2194 system_io = g_malloc(sizeof(*system_io));
2195 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2196 65536);
2197 address_space_init(&address_space_io, system_io, "I/O");
2198
2199 memory_listener_register(&core_memory_listener, &address_space_memory);
2200 }
2201
2202 MemoryRegion *get_system_memory(void)
2203 {
2204 return system_memory;
2205 }
2206
2207 MemoryRegion *get_system_io(void)
2208 {
2209 return system_io;
2210 }
2211
2212 #endif /* !defined(CONFIG_USER_ONLY) */
2213
2214 /* physical memory access (slow version, mainly for debug) */
2215 #if defined(CONFIG_USER_ONLY)
2216 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2217 uint8_t *buf, int len, int is_write)
2218 {
2219 int l, flags;
2220 target_ulong page;
2221 void * p;
2222
2223 while (len > 0) {
2224 page = addr & TARGET_PAGE_MASK;
2225 l = (page + TARGET_PAGE_SIZE) - addr;
2226 if (l > len)
2227 l = len;
2228 flags = page_get_flags(page);
2229 if (!(flags & PAGE_VALID))
2230 return -1;
2231 if (is_write) {
2232 if (!(flags & PAGE_WRITE))
2233 return -1;
2234 /* XXX: this code should not depend on lock_user */
2235 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2236 return -1;
2237 memcpy(p, buf, l);
2238 unlock_user(p, addr, l);
2239 } else {
2240 if (!(flags & PAGE_READ))
2241 return -1;
2242 /* XXX: this code should not depend on lock_user */
2243 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2244 return -1;
2245 memcpy(buf, p, l);
2246 unlock_user(p, addr, 0);
2247 }
2248 len -= l;
2249 buf += l;
2250 addr += l;
2251 }
2252 return 0;
2253 }
2254
2255 #else
2256
2257 static void invalidate_and_set_dirty(hwaddr addr,
2258 hwaddr length)
2259 {
2260 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2261 tb_invalidate_phys_range(addr, addr + length, 0);
2262 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2263 }
2264 xen_modified_memory(addr, length);
2265 }
2266
2267 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2268 {
2269 unsigned access_size_max = mr->ops->valid.max_access_size;
2270
2271 /* Regions are assumed to support 1-4 byte accesses unless
2272 otherwise specified. */
2273 if (access_size_max == 0) {
2274 access_size_max = 4;
2275 }
2276
2277 /* Bound the maximum access by the alignment of the address. */
2278 if (!mr->ops->impl.unaligned) {
2279 unsigned align_size_max = addr & -addr;
2280 if (align_size_max != 0 && align_size_max < access_size_max) {
2281 access_size_max = align_size_max;
2282 }
2283 }
2284
2285 /* Don't attempt accesses larger than the maximum. */
2286 if (l > access_size_max) {
2287 l = access_size_max;
2288 }
2289 if (l & (l - 1)) {
2290 l = 1 << (qemu_fls(l) - 1);
2291 }
2292
2293 return l;
2294 }
2295
2296 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2297 int len, bool is_write)
2298 {
2299 hwaddr l;
2300 uint8_t *ptr;
2301 uint64_t val;
2302 hwaddr addr1;
2303 MemoryRegion *mr;
2304 bool error = false;
2305
2306 while (len > 0) {
2307 l = len;
2308 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2309
2310 if (is_write) {
2311 if (!memory_access_is_direct(mr, is_write)) {
2312 l = memory_access_size(mr, l, addr1);
2313 /* XXX: could force current_cpu to NULL to avoid
2314 potential bugs */
2315 switch (l) {
2316 case 8:
2317 /* 64 bit write access */
2318 val = ldq_p(buf);
2319 error |= io_mem_write(mr, addr1, val, 8);
2320 break;
2321 case 4:
2322 /* 32 bit write access */
2323 val = ldl_p(buf);
2324 error |= io_mem_write(mr, addr1, val, 4);
2325 break;
2326 case 2:
2327 /* 16 bit write access */
2328 val = lduw_p(buf);
2329 error |= io_mem_write(mr, addr1, val, 2);
2330 break;
2331 case 1:
2332 /* 8 bit write access */
2333 val = ldub_p(buf);
2334 error |= io_mem_write(mr, addr1, val, 1);
2335 break;
2336 default:
2337 abort();
2338 }
2339 } else {
2340 addr1 += memory_region_get_ram_addr(mr);
2341 /* RAM case */
2342 ptr = qemu_get_ram_ptr(addr1);
2343 memcpy(ptr, buf, l);
2344 invalidate_and_set_dirty(addr1, l);
2345 }
2346 } else {
2347 if (!memory_access_is_direct(mr, is_write)) {
2348 /* I/O case */
2349 l = memory_access_size(mr, l, addr1);
2350 switch (l) {
2351 case 8:
2352 /* 64 bit read access */
2353 error |= io_mem_read(mr, addr1, &val, 8);
2354 stq_p(buf, val);
2355 break;
2356 case 4:
2357 /* 32 bit read access */
2358 error |= io_mem_read(mr, addr1, &val, 4);
2359 stl_p(buf, val);
2360 break;
2361 case 2:
2362 /* 16 bit read access */
2363 error |= io_mem_read(mr, addr1, &val, 2);
2364 stw_p(buf, val);
2365 break;
2366 case 1:
2367 /* 8 bit read access */
2368 error |= io_mem_read(mr, addr1, &val, 1);
2369 stb_p(buf, val);
2370 break;
2371 default:
2372 abort();
2373 }
2374 } else {
2375 /* RAM case */
2376 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2377 memcpy(buf, ptr, l);
2378 }
2379 }
2380 len -= l;
2381 buf += l;
2382 addr += l;
2383 }
2384
2385 return error;
2386 }
2387
2388 bool address_space_write(AddressSpace *as, hwaddr addr,
2389 const uint8_t *buf, int len)
2390 {
2391 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2392 }
2393
2394 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2395 {
2396 return address_space_rw(as, addr, buf, len, false);
2397 }
2398
2399
2400 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2401 int len, int is_write)
2402 {
2403 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2404 }
2405
2406 enum write_rom_type {
2407 WRITE_DATA,
2408 FLUSH_CACHE,
2409 };
2410
2411 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2412 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2413 {
2414 hwaddr l;
2415 uint8_t *ptr;
2416 hwaddr addr1;
2417 MemoryRegion *mr;
2418
2419 while (len > 0) {
2420 l = len;
2421 mr = address_space_translate(as, addr, &addr1, &l, true);
2422
2423 if (!(memory_region_is_ram(mr) ||
2424 memory_region_is_romd(mr))) {
2425 /* do nothing */
2426 } else {
2427 addr1 += memory_region_get_ram_addr(mr);
2428 /* ROM/RAM case */
2429 ptr = qemu_get_ram_ptr(addr1);
2430 switch (type) {
2431 case WRITE_DATA:
2432 memcpy(ptr, buf, l);
2433 invalidate_and_set_dirty(addr1, l);
2434 break;
2435 case FLUSH_CACHE:
2436 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2437 break;
2438 }
2439 }
2440 len -= l;
2441 buf += l;
2442 addr += l;
2443 }
2444 }
2445
2446 /* used for ROM loading : can write in RAM and ROM */
2447 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2448 const uint8_t *buf, int len)
2449 {
2450 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2451 }
2452
2453 void cpu_flush_icache_range(hwaddr start, int len)
2454 {
2455 /*
2456 * This function should do the same thing as an icache flush that was
2457 * triggered from within the guest. For TCG we are always cache coherent,
2458 * so there is no need to flush anything. For KVM / Xen we need to flush
2459 * the host's instruction cache at least.
2460 */
2461 if (tcg_enabled()) {
2462 return;
2463 }
2464
2465 cpu_physical_memory_write_rom_internal(&address_space_memory,
2466 start, NULL, len, FLUSH_CACHE);
2467 }
2468
2469 typedef struct {
2470 MemoryRegion *mr;
2471 void *buffer;
2472 hwaddr addr;
2473 hwaddr len;
2474 bool in_use;
2475 } BounceBuffer;
2476
2477 static BounceBuffer bounce;
2478
2479 typedef struct MapClient {
2480 QEMUBH *bh;
2481 QLIST_ENTRY(MapClient) link;
2482 } MapClient;
2483
2484 QemuMutex map_client_list_lock;
2485 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2486 = QLIST_HEAD_INITIALIZER(map_client_list);
2487
2488 static void cpu_unregister_map_client_do(MapClient *client)
2489 {
2490 QLIST_REMOVE(client, link);
2491 g_free(client);
2492 }
2493
2494 static void cpu_notify_map_clients_locked(void)
2495 {
2496 MapClient *client;
2497
2498 while (!QLIST_EMPTY(&map_client_list)) {
2499 client = QLIST_FIRST(&map_client_list);
2500 qemu_bh_schedule(client->bh);
2501 cpu_unregister_map_client_do(client);
2502 }
2503 }
2504
2505 void cpu_register_map_client(QEMUBH *bh)
2506 {
2507 MapClient *client = g_malloc(sizeof(*client));
2508
2509 qemu_mutex_lock(&map_client_list_lock);
2510 client->bh = bh;
2511 QLIST_INSERT_HEAD(&map_client_list, client, link);
2512 if (!atomic_read(&bounce.in_use)) {
2513 cpu_notify_map_clients_locked();
2514 }
2515 qemu_mutex_unlock(&map_client_list_lock);
2516 }
2517
2518 void cpu_exec_init_all(void)
2519 {
2520 qemu_mutex_init(&ram_list.mutex);
2521 memory_map_init();
2522 io_mem_init();
2523 qemu_mutex_init(&map_client_list_lock);
2524 }
2525
2526 void cpu_unregister_map_client(QEMUBH *bh)
2527 {
2528 MapClient *client;
2529
2530 qemu_mutex_lock(&map_client_list_lock);
2531 QLIST_FOREACH(client, &map_client_list, link) {
2532 if (client->bh == bh) {
2533 cpu_unregister_map_client_do(client);
2534 break;
2535 }
2536 }
2537 qemu_mutex_unlock(&map_client_list_lock);
2538 }
2539
2540 static void cpu_notify_map_clients(void)
2541 {
2542 qemu_mutex_lock(&map_client_list_lock);
2543 cpu_notify_map_clients_locked();
2544 qemu_mutex_unlock(&map_client_list_lock);
2545 }
2546
2547 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2548 {
2549 MemoryRegion *mr;
2550 hwaddr l, xlat;
2551
2552 while (len > 0) {
2553 l = len;
2554 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2555 if (!memory_access_is_direct(mr, is_write)) {
2556 l = memory_access_size(mr, l, addr);
2557 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2558 return false;
2559 }
2560 }
2561
2562 len -= l;
2563 addr += l;
2564 }
2565 return true;
2566 }
2567
2568 /* Map a physical memory region into a host virtual address.
2569 * May map a subset of the requested range, given by and returned in *plen.
2570 * May return NULL if resources needed to perform the mapping are exhausted.
2571 * Use only for reads OR writes - not for read-modify-write operations.
2572 * Use cpu_register_map_client() to know when retrying the map operation is
2573 * likely to succeed.
2574 */
2575 void *address_space_map(AddressSpace *as,
2576 hwaddr addr,
2577 hwaddr *plen,
2578 bool is_write)
2579 {
2580 hwaddr len = *plen;
2581 hwaddr done = 0;
2582 hwaddr l, xlat, base;
2583 MemoryRegion *mr, *this_mr;
2584 ram_addr_t raddr;
2585
2586 if (len == 0) {
2587 return NULL;
2588 }
2589
2590 l = len;
2591 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2592 if (!memory_access_is_direct(mr, is_write)) {
2593 if (atomic_xchg(&bounce.in_use, true)) {
2594 return NULL;
2595 }
2596 /* Avoid unbounded allocations */
2597 l = MIN(l, TARGET_PAGE_SIZE);
2598 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2599 bounce.addr = addr;
2600 bounce.len = l;
2601
2602 memory_region_ref(mr);
2603 bounce.mr = mr;
2604 if (!is_write) {
2605 address_space_read(as, addr, bounce.buffer, l);
2606 }
2607
2608 *plen = l;
2609 return bounce.buffer;
2610 }
2611
2612 base = xlat;
2613 raddr = memory_region_get_ram_addr(mr);
2614
2615 for (;;) {
2616 len -= l;
2617 addr += l;
2618 done += l;
2619 if (len == 0) {
2620 break;
2621 }
2622
2623 l = len;
2624 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2625 if (this_mr != mr || xlat != base + done) {
2626 break;
2627 }
2628 }
2629
2630 memory_region_ref(mr);
2631 *plen = done;
2632 return qemu_ram_ptr_length(raddr + base, plen);
2633 }
2634
2635 /* Unmaps a memory region previously mapped by address_space_map().
2636 * Will also mark the memory as dirty if is_write == 1. access_len gives
2637 * the amount of memory that was actually read or written by the caller.
2638 */
2639 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2640 int is_write, hwaddr access_len)
2641 {
2642 if (buffer != bounce.buffer) {
2643 MemoryRegion *mr;
2644 ram_addr_t addr1;
2645
2646 mr = qemu_ram_addr_from_host(buffer, &addr1);
2647 assert(mr != NULL);
2648 if (is_write) {
2649 invalidate_and_set_dirty(addr1, access_len);
2650 }
2651 if (xen_enabled()) {
2652 xen_invalidate_map_cache_entry(buffer);
2653 }
2654 memory_region_unref(mr);
2655 return;
2656 }
2657 if (is_write) {
2658 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2659 }
2660 qemu_vfree(bounce.buffer);
2661 bounce.buffer = NULL;
2662 memory_region_unref(bounce.mr);
2663 atomic_mb_set(&bounce.in_use, false);
2664 cpu_notify_map_clients();
2665 }
2666
2667 void *cpu_physical_memory_map(hwaddr addr,
2668 hwaddr *plen,
2669 int is_write)
2670 {
2671 return address_space_map(&address_space_memory, addr, plen, is_write);
2672 }
2673
2674 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2675 int is_write, hwaddr access_len)
2676 {
2677 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2678 }
2679
2680 /* warning: addr must be aligned */
2681 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2682 enum device_endian endian)
2683 {
2684 uint8_t *ptr;
2685 uint64_t val;
2686 MemoryRegion *mr;
2687 hwaddr l = 4;
2688 hwaddr addr1;
2689
2690 mr = address_space_translate(as, addr, &addr1, &l, false);
2691 if (l < 4 || !memory_access_is_direct(mr, false)) {
2692 /* I/O case */
2693 io_mem_read(mr, addr1, &val, 4);
2694 #if defined(TARGET_WORDS_BIGENDIAN)
2695 if (endian == DEVICE_LITTLE_ENDIAN) {
2696 val = bswap32(val);
2697 }
2698 #else
2699 if (endian == DEVICE_BIG_ENDIAN) {
2700 val = bswap32(val);
2701 }
2702 #endif
2703 } else {
2704 /* RAM case */
2705 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2706 & TARGET_PAGE_MASK)
2707 + addr1);
2708 switch (endian) {
2709 case DEVICE_LITTLE_ENDIAN:
2710 val = ldl_le_p(ptr);
2711 break;
2712 case DEVICE_BIG_ENDIAN:
2713 val = ldl_be_p(ptr);
2714 break;
2715 default:
2716 val = ldl_p(ptr);
2717 break;
2718 }
2719 }
2720 return val;
2721 }
2722
2723 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2724 {
2725 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2726 }
2727
2728 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2729 {
2730 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2731 }
2732
2733 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2734 {
2735 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2736 }
2737
2738 /* warning: addr must be aligned */
2739 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2740 enum device_endian endian)
2741 {
2742 uint8_t *ptr;
2743 uint64_t val;
2744 MemoryRegion *mr;
2745 hwaddr l = 8;
2746 hwaddr addr1;
2747
2748 mr = address_space_translate(as, addr, &addr1, &l,
2749 false);
2750 if (l < 8 || !memory_access_is_direct(mr, false)) {
2751 /* I/O case */
2752 io_mem_read(mr, addr1, &val, 8);
2753 #if defined(TARGET_WORDS_BIGENDIAN)
2754 if (endian == DEVICE_LITTLE_ENDIAN) {
2755 val = bswap64(val);
2756 }
2757 #else
2758 if (endian == DEVICE_BIG_ENDIAN) {
2759 val = bswap64(val);
2760 }
2761 #endif
2762 } else {
2763 /* RAM case */
2764 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2765 & TARGET_PAGE_MASK)
2766 + addr1);
2767 switch (endian) {
2768 case DEVICE_LITTLE_ENDIAN:
2769 val = ldq_le_p(ptr);
2770 break;
2771 case DEVICE_BIG_ENDIAN:
2772 val = ldq_be_p(ptr);
2773 break;
2774 default:
2775 val = ldq_p(ptr);
2776 break;
2777 }
2778 }
2779 return val;
2780 }
2781
2782 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2783 {
2784 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2785 }
2786
2787 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2788 {
2789 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2790 }
2791
2792 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2793 {
2794 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2795 }
2796
2797 /* XXX: optimize */
2798 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2799 {
2800 uint8_t val;
2801 address_space_rw(as, addr, &val, 1, 0);
2802 return val;
2803 }
2804
2805 /* warning: addr must be aligned */
2806 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2807 enum device_endian endian)
2808 {
2809 uint8_t *ptr;
2810 uint64_t val;
2811 MemoryRegion *mr;
2812 hwaddr l = 2;
2813 hwaddr addr1;
2814
2815 mr = address_space_translate(as, addr, &addr1, &l,
2816 false);
2817 if (l < 2 || !memory_access_is_direct(mr, false)) {
2818 /* I/O case */
2819 io_mem_read(mr, addr1, &val, 2);
2820 #if defined(TARGET_WORDS_BIGENDIAN)
2821 if (endian == DEVICE_LITTLE_ENDIAN) {
2822 val = bswap16(val);
2823 }
2824 #else
2825 if (endian == DEVICE_BIG_ENDIAN) {
2826 val = bswap16(val);
2827 }
2828 #endif
2829 } else {
2830 /* RAM case */
2831 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2832 & TARGET_PAGE_MASK)
2833 + addr1);
2834 switch (endian) {
2835 case DEVICE_LITTLE_ENDIAN:
2836 val = lduw_le_p(ptr);
2837 break;
2838 case DEVICE_BIG_ENDIAN:
2839 val = lduw_be_p(ptr);
2840 break;
2841 default:
2842 val = lduw_p(ptr);
2843 break;
2844 }
2845 }
2846 return val;
2847 }
2848
2849 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2850 {
2851 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2852 }
2853
2854 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2855 {
2856 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2857 }
2858
2859 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2860 {
2861 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2862 }
2863
2864 /* warning: addr must be aligned. The ram page is not masked as dirty
2865 and the code inside is not invalidated. It is useful if the dirty
2866 bits are used to track modified PTEs */
2867 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2868 {
2869 uint8_t *ptr;
2870 MemoryRegion *mr;
2871 hwaddr l = 4;
2872 hwaddr addr1;
2873
2874 mr = address_space_translate(as, addr, &addr1, &l,
2875 true);
2876 if (l < 4 || !memory_access_is_direct(mr, true)) {
2877 io_mem_write(mr, addr1, val, 4);
2878 } else {
2879 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2880 ptr = qemu_get_ram_ptr(addr1);
2881 stl_p(ptr, val);
2882
2883 if (unlikely(in_migration)) {
2884 if (cpu_physical_memory_is_clean(addr1)) {
2885 /* invalidate code */
2886 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2887 /* set dirty bit */
2888 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2889 }
2890 }
2891 }
2892 }
2893
2894 /* warning: addr must be aligned */
2895 static inline void stl_phys_internal(AddressSpace *as,
2896 hwaddr addr, uint32_t val,
2897 enum device_endian endian)
2898 {
2899 uint8_t *ptr;
2900 MemoryRegion *mr;
2901 hwaddr l = 4;
2902 hwaddr addr1;
2903
2904 mr = address_space_translate(as, addr, &addr1, &l,
2905 true);
2906 if (l < 4 || !memory_access_is_direct(mr, true)) {
2907 #if defined(TARGET_WORDS_BIGENDIAN)
2908 if (endian == DEVICE_LITTLE_ENDIAN) {
2909 val = bswap32(val);
2910 }
2911 #else
2912 if (endian == DEVICE_BIG_ENDIAN) {
2913 val = bswap32(val);
2914 }
2915 #endif
2916 io_mem_write(mr, addr1, val, 4);
2917 } else {
2918 /* RAM case */
2919 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2920 ptr = qemu_get_ram_ptr(addr1);
2921 switch (endian) {
2922 case DEVICE_LITTLE_ENDIAN:
2923 stl_le_p(ptr, val);
2924 break;
2925 case DEVICE_BIG_ENDIAN:
2926 stl_be_p(ptr, val);
2927 break;
2928 default:
2929 stl_p(ptr, val);
2930 break;
2931 }
2932 invalidate_and_set_dirty(addr1, 4);
2933 }
2934 }
2935
2936 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2937 {
2938 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2939 }
2940
2941 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2942 {
2943 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2944 }
2945
2946 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2947 {
2948 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2949 }
2950
2951 /* XXX: optimize */
2952 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2953 {
2954 uint8_t v = val;
2955 address_space_rw(as, addr, &v, 1, 1);
2956 }
2957
2958 /* warning: addr must be aligned */
2959 static inline void stw_phys_internal(AddressSpace *as,
2960 hwaddr addr, uint32_t val,
2961 enum device_endian endian)
2962 {
2963 uint8_t *ptr;
2964 MemoryRegion *mr;
2965 hwaddr l = 2;
2966 hwaddr addr1;
2967
2968 mr = address_space_translate(as, addr, &addr1, &l, true);
2969 if (l < 2 || !memory_access_is_direct(mr, true)) {
2970 #if defined(TARGET_WORDS_BIGENDIAN)
2971 if (endian == DEVICE_LITTLE_ENDIAN) {
2972 val = bswap16(val);
2973 }
2974 #else
2975 if (endian == DEVICE_BIG_ENDIAN) {
2976 val = bswap16(val);
2977 }
2978 #endif
2979 io_mem_write(mr, addr1, val, 2);
2980 } else {
2981 /* RAM case */
2982 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2983 ptr = qemu_get_ram_ptr(addr1);
2984 switch (endian) {
2985 case DEVICE_LITTLE_ENDIAN:
2986 stw_le_p(ptr, val);
2987 break;
2988 case DEVICE_BIG_ENDIAN:
2989 stw_be_p(ptr, val);
2990 break;
2991 default:
2992 stw_p(ptr, val);
2993 break;
2994 }
2995 invalidate_and_set_dirty(addr1, 2);
2996 }
2997 }
2998
2999 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3000 {
3001 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
3002 }
3003
3004 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3005 {
3006 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
3007 }
3008
3009 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3010 {
3011 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
3012 }
3013
3014 /* XXX: optimize */
3015 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3016 {
3017 val = tswap64(val);
3018 address_space_rw(as, addr, (void *) &val, 8, 1);
3019 }
3020
3021 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3022 {
3023 val = cpu_to_le64(val);
3024 address_space_rw(as, addr, (void *) &val, 8, 1);
3025 }
3026
3027 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3028 {
3029 val = cpu_to_be64(val);
3030 address_space_rw(as, addr, (void *) &val, 8, 1);
3031 }
3032
3033 /* virtual memory access for debug (includes writing to ROM) */
3034 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3035 uint8_t *buf, int len, int is_write)
3036 {
3037 int l;
3038 hwaddr phys_addr;
3039 target_ulong page;
3040
3041 while (len > 0) {
3042 page = addr & TARGET_PAGE_MASK;
3043 phys_addr = cpu_get_phys_page_debug(cpu, page);
3044 /* if no physical page mapped, return an error */
3045 if (phys_addr == -1)
3046 return -1;
3047 l = (page + TARGET_PAGE_SIZE) - addr;
3048 if (l > len)
3049 l = len;
3050 phys_addr += (addr & ~TARGET_PAGE_MASK);
3051 if (is_write) {
3052 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3053 } else {
3054 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3055 }
3056 len -= l;
3057 buf += l;
3058 addr += l;
3059 }
3060 return 0;
3061 }
3062 #endif
3063
3064 /*
3065 * A helper function for the _utterly broken_ virtio device model to find out if
3066 * it's running on a big endian machine. Don't do this at home kids!
3067 */
3068 bool target_words_bigendian(void);
3069 bool target_words_bigendian(void)
3070 {
3071 #if defined(TARGET_WORDS_BIGENDIAN)
3072 return true;
3073 #else
3074 return false;
3075 #endif
3076 }
3077
3078 #ifndef CONFIG_USER_ONLY
3079 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3080 {
3081 MemoryRegion*mr;
3082 hwaddr l = 1;
3083
3084 mr = address_space_translate(&address_space_memory,
3085 phys_addr, &phys_addr, &l, false);
3086
3087 return !(memory_region_is_ram(mr) ||
3088 memory_region_is_romd(mr));
3089 }
3090
3091 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3092 {
3093 RAMBlock *block;
3094
3095 rcu_read_lock();
3096 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3097 func(block->host, block->offset, block->used_length, opaque);
3098 }
3099 rcu_read_unlock();
3100 }
3101 #endif