4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "exec/cputlb.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 //#define DEBUG_SUBPAGE
61 #if !defined(CONFIG_USER_ONLY)
62 static bool in_migration
;
64 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
67 RAMList ram_list
= { .blocks
= QLIST_HEAD_INITIALIZER(ram_list
.blocks
) };
69 static MemoryRegion
*system_memory
;
70 static MemoryRegion
*system_io
;
72 AddressSpace address_space_io
;
73 AddressSpace address_space_memory
;
75 MemoryRegion io_mem_rom
, io_mem_notdirty
;
76 static MemoryRegion io_mem_unassigned
;
78 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79 #define RAM_PREALLOC (1 << 0)
81 /* RAM is mmap-ed with MAP_SHARED */
82 #define RAM_SHARED (1 << 1)
84 /* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
87 #define RAM_RESIZEABLE (1 << 2)
91 struct CPUTailQ cpus
= QTAILQ_HEAD_INITIALIZER(cpus
);
92 /* current CPU in the current thread. It is only valid inside
94 DEFINE_TLS(CPUState
*, current_cpu
);
95 /* 0 = Do not count executed instructions.
96 1 = Precise instruction counting.
97 2 = Adaptive rate instruction counting. */
100 #if !defined(CONFIG_USER_ONLY)
102 typedef struct PhysPageEntry PhysPageEntry
;
104 struct PhysPageEntry
{
105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
111 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
113 /* Size of the L2 (and L3, etc) page tables. */
114 #define ADDR_SPACE_BITS 64
117 #define P_L2_SIZE (1 << P_L2_BITS)
119 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
121 typedef PhysPageEntry Node
[P_L2_SIZE
];
123 typedef struct PhysPageMap
{
126 unsigned sections_nb
;
127 unsigned sections_nb_alloc
;
129 unsigned nodes_nb_alloc
;
131 MemoryRegionSection
*sections
;
134 struct AddressSpaceDispatch
{
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
140 PhysPageEntry phys_map
;
145 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146 typedef struct subpage_t
{
150 uint16_t sub_section
[TARGET_PAGE_SIZE
];
153 #define PHYS_SECTION_UNASSIGNED 0
154 #define PHYS_SECTION_NOTDIRTY 1
155 #define PHYS_SECTION_ROM 2
156 #define PHYS_SECTION_WATCH 3
158 static void io_mem_init(void);
159 static void memory_map_init(void);
160 static void tcg_commit(MemoryListener
*listener
);
162 static MemoryRegion io_mem_watch
;
165 #if !defined(CONFIG_USER_ONLY)
167 static void phys_map_node_reserve(PhysPageMap
*map
, unsigned nodes
)
169 if (map
->nodes_nb
+ nodes
> map
->nodes_nb_alloc
) {
170 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
* 2, 16);
171 map
->nodes_nb_alloc
= MAX(map
->nodes_nb_alloc
, map
->nodes_nb
+ nodes
);
172 map
->nodes
= g_renew(Node
, map
->nodes
, map
->nodes_nb_alloc
);
176 static uint32_t phys_map_node_alloc(PhysPageMap
*map
)
181 ret
= map
->nodes_nb
++;
182 assert(ret
!= PHYS_MAP_NODE_NIL
);
183 assert(ret
!= map
->nodes_nb_alloc
);
184 for (i
= 0; i
< P_L2_SIZE
; ++i
) {
185 map
->nodes
[ret
][i
].skip
= 1;
186 map
->nodes
[ret
][i
].ptr
= PHYS_MAP_NODE_NIL
;
191 static void phys_page_set_level(PhysPageMap
*map
, PhysPageEntry
*lp
,
192 hwaddr
*index
, hwaddr
*nb
, uint16_t leaf
,
197 hwaddr step
= (hwaddr
)1 << (level
* P_L2_BITS
);
199 if (lp
->skip
&& lp
->ptr
== PHYS_MAP_NODE_NIL
) {
200 lp
->ptr
= phys_map_node_alloc(map
);
201 p
= map
->nodes
[lp
->ptr
];
203 for (i
= 0; i
< P_L2_SIZE
; i
++) {
205 p
[i
].ptr
= PHYS_SECTION_UNASSIGNED
;
209 p
= map
->nodes
[lp
->ptr
];
211 lp
= &p
[(*index
>> (level
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
213 while (*nb
&& lp
< &p
[P_L2_SIZE
]) {
214 if ((*index
& (step
- 1)) == 0 && *nb
>= step
) {
220 phys_page_set_level(map
, lp
, index
, nb
, leaf
, level
- 1);
226 static void phys_page_set(AddressSpaceDispatch
*d
,
227 hwaddr index
, hwaddr nb
,
230 /* Wildly overreserve - it doesn't matter much. */
231 phys_map_node_reserve(&d
->map
, 3 * P_L2_LEVELS
);
233 phys_page_set_level(&d
->map
, &d
->phys_map
, &index
, &nb
, leaf
, P_L2_LEVELS
- 1);
236 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
239 static void phys_page_compact(PhysPageEntry
*lp
, Node
*nodes
, unsigned long *compacted
)
241 unsigned valid_ptr
= P_L2_SIZE
;
246 if (lp
->ptr
== PHYS_MAP_NODE_NIL
) {
251 for (i
= 0; i
< P_L2_SIZE
; i
++) {
252 if (p
[i
].ptr
== PHYS_MAP_NODE_NIL
) {
259 phys_page_compact(&p
[i
], nodes
, compacted
);
263 /* We can only compress if there's only one child. */
268 assert(valid_ptr
< P_L2_SIZE
);
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp
->skip
+ p
[valid_ptr
].skip
>= (1 << 3)) {
275 lp
->ptr
= p
[valid_ptr
].ptr
;
276 if (!p
[valid_ptr
].skip
) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
285 lp
->skip
+= p
[valid_ptr
].skip
;
289 static void phys_page_compact_all(AddressSpaceDispatch
*d
, int nodes_nb
)
291 DECLARE_BITMAP(compacted
, nodes_nb
);
293 if (d
->phys_map
.skip
) {
294 phys_page_compact(&d
->phys_map
, d
->map
.nodes
, compacted
);
298 static MemoryRegionSection
*phys_page_find(PhysPageEntry lp
, hwaddr addr
,
299 Node
*nodes
, MemoryRegionSection
*sections
)
302 hwaddr index
= addr
>> TARGET_PAGE_BITS
;
305 for (i
= P_L2_LEVELS
; lp
.skip
&& (i
-= lp
.skip
) >= 0;) {
306 if (lp
.ptr
== PHYS_MAP_NODE_NIL
) {
307 return §ions
[PHYS_SECTION_UNASSIGNED
];
310 lp
= p
[(index
>> (i
* P_L2_BITS
)) & (P_L2_SIZE
- 1)];
313 if (sections
[lp
.ptr
].size
.hi
||
314 range_covers_byte(sections
[lp
.ptr
].offset_within_address_space
,
315 sections
[lp
.ptr
].size
.lo
, addr
)) {
316 return §ions
[lp
.ptr
];
318 return §ions
[PHYS_SECTION_UNASSIGNED
];
322 bool memory_region_is_unassigned(MemoryRegion
*mr
)
324 return mr
!= &io_mem_rom
&& mr
!= &io_mem_notdirty
&& !mr
->rom_device
325 && mr
!= &io_mem_watch
;
328 /* Called from RCU critical section */
329 static MemoryRegionSection
*address_space_lookup_region(AddressSpaceDispatch
*d
,
331 bool resolve_subpage
)
333 MemoryRegionSection
*section
;
336 section
= phys_page_find(d
->phys_map
, addr
, d
->map
.nodes
, d
->map
.sections
);
337 if (resolve_subpage
&& section
->mr
->subpage
) {
338 subpage
= container_of(section
->mr
, subpage_t
, iomem
);
339 section
= &d
->map
.sections
[subpage
->sub_section
[SUBPAGE_IDX(addr
)]];
344 /* Called from RCU critical section */
345 static MemoryRegionSection
*
346 address_space_translate_internal(AddressSpaceDispatch
*d
, hwaddr addr
, hwaddr
*xlat
,
347 hwaddr
*plen
, bool resolve_subpage
)
349 MemoryRegionSection
*section
;
352 section
= address_space_lookup_region(d
, addr
, resolve_subpage
);
353 /* Compute offset within MemoryRegionSection */
354 addr
-= section
->offset_within_address_space
;
356 /* Compute offset within MemoryRegion */
357 *xlat
= addr
+ section
->offset_within_region
;
359 diff
= int128_sub(section
->mr
->size
, int128_make64(addr
));
360 *plen
= int128_get64(int128_min(diff
, int128_make64(*plen
)));
364 static inline bool memory_access_is_direct(MemoryRegion
*mr
, bool is_write
)
366 if (memory_region_is_ram(mr
)) {
367 return !(is_write
&& mr
->readonly
);
369 if (memory_region_is_romd(mr
)) {
376 MemoryRegion
*address_space_translate(AddressSpace
*as
, hwaddr addr
,
377 hwaddr
*xlat
, hwaddr
*plen
,
381 MemoryRegionSection
*section
;
386 AddressSpaceDispatch
*d
= atomic_rcu_read(&as
->dispatch
);
387 section
= address_space_translate_internal(d
, addr
, &addr
, plen
, true);
390 if (!mr
->iommu_ops
) {
394 iotlb
= mr
->iommu_ops
->translate(mr
, addr
, is_write
);
395 addr
= ((iotlb
.translated_addr
& ~iotlb
.addr_mask
)
396 | (addr
& iotlb
.addr_mask
));
397 *plen
= MIN(*plen
, (addr
| iotlb
.addr_mask
) - addr
+ 1);
398 if (!(iotlb
.perm
& (1 << is_write
))) {
399 mr
= &io_mem_unassigned
;
403 as
= iotlb
.target_as
;
406 if (xen_enabled() && memory_access_is_direct(mr
, is_write
)) {
407 hwaddr page
= ((addr
& TARGET_PAGE_MASK
) + TARGET_PAGE_SIZE
) - addr
;
408 *plen
= MIN(page
, *plen
);
416 /* Called from RCU critical section */
417 MemoryRegionSection
*
418 address_space_translate_for_iotlb(CPUState
*cpu
, hwaddr addr
,
419 hwaddr
*xlat
, hwaddr
*plen
)
421 MemoryRegionSection
*section
;
422 section
= address_space_translate_internal(cpu
->memory_dispatch
,
423 addr
, xlat
, plen
, false);
425 assert(!section
->mr
->iommu_ops
);
430 #if !defined(CONFIG_USER_ONLY)
432 static int cpu_common_post_load(void *opaque
, int version_id
)
434 CPUState
*cpu
= opaque
;
436 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
437 version_id is increased. */
438 cpu
->interrupt_request
&= ~0x01;
444 static int cpu_common_pre_load(void *opaque
)
446 CPUState
*cpu
= opaque
;
448 cpu
->exception_index
= -1;
453 static bool cpu_common_exception_index_needed(void *opaque
)
455 CPUState
*cpu
= opaque
;
457 return tcg_enabled() && cpu
->exception_index
!= -1;
460 static const VMStateDescription vmstate_cpu_common_exception_index
= {
461 .name
= "cpu_common/exception_index",
463 .minimum_version_id
= 1,
464 .fields
= (VMStateField
[]) {
465 VMSTATE_INT32(exception_index
, CPUState
),
466 VMSTATE_END_OF_LIST()
470 const VMStateDescription vmstate_cpu_common
= {
471 .name
= "cpu_common",
473 .minimum_version_id
= 1,
474 .pre_load
= cpu_common_pre_load
,
475 .post_load
= cpu_common_post_load
,
476 .fields
= (VMStateField
[]) {
477 VMSTATE_UINT32(halted
, CPUState
),
478 VMSTATE_UINT32(interrupt_request
, CPUState
),
479 VMSTATE_END_OF_LIST()
481 .subsections
= (VMStateSubsection
[]) {
483 .vmsd
= &vmstate_cpu_common_exception_index
,
484 .needed
= cpu_common_exception_index_needed
,
493 CPUState
*qemu_get_cpu(int index
)
498 if (cpu
->cpu_index
== index
) {
506 #if !defined(CONFIG_USER_ONLY)
507 void tcg_cpu_address_space_init(CPUState
*cpu
, AddressSpace
*as
)
509 /* We only support one address space per cpu at the moment. */
510 assert(cpu
->as
== as
);
512 if (cpu
->tcg_as_listener
) {
513 memory_listener_unregister(cpu
->tcg_as_listener
);
515 cpu
->tcg_as_listener
= g_new0(MemoryListener
, 1);
517 cpu
->tcg_as_listener
->commit
= tcg_commit
;
518 memory_listener_register(cpu
->tcg_as_listener
, as
);
522 void cpu_exec_init(CPUArchState
*env
)
524 CPUState
*cpu
= ENV_GET_CPU(env
);
525 CPUClass
*cc
= CPU_GET_CLASS(cpu
);
529 #if defined(CONFIG_USER_ONLY)
533 CPU_FOREACH(some_cpu
) {
536 cpu
->cpu_index
= cpu_index
;
538 QTAILQ_INIT(&cpu
->breakpoints
);
539 QTAILQ_INIT(&cpu
->watchpoints
);
540 #ifndef CONFIG_USER_ONLY
541 cpu
->as
= &address_space_memory
;
542 cpu
->thread_id
= qemu_get_thread_id();
543 cpu_reload_memory_map(cpu
);
545 QTAILQ_INSERT_TAIL(&cpus
, cpu
, node
);
546 #if defined(CONFIG_USER_ONLY)
549 if (qdev_get_vmsd(DEVICE(cpu
)) == NULL
) {
550 vmstate_register(NULL
, cpu_index
, &vmstate_cpu_common
, cpu
);
552 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
553 register_savevm(NULL
, "cpu", cpu_index
, CPU_SAVE_VERSION
,
554 cpu_save
, cpu_load
, env
);
555 assert(cc
->vmsd
== NULL
);
556 assert(qdev_get_vmsd(DEVICE(cpu
)) == NULL
);
558 if (cc
->vmsd
!= NULL
) {
559 vmstate_register(NULL
, cpu_index
, cc
->vmsd
, cpu
);
563 #if defined(CONFIG_USER_ONLY)
564 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
566 tb_invalidate_phys_page_range(pc
, pc
+ 1, 0);
569 static void breakpoint_invalidate(CPUState
*cpu
, target_ulong pc
)
571 hwaddr phys
= cpu_get_phys_page_debug(cpu
, pc
);
573 tb_invalidate_phys_addr(cpu
->as
,
574 phys
| (pc
& ~TARGET_PAGE_MASK
));
579 #if defined(CONFIG_USER_ONLY)
580 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
585 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
591 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
595 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
596 int flags
, CPUWatchpoint
**watchpoint
)
601 /* Add a watchpoint. */
602 int cpu_watchpoint_insert(CPUState
*cpu
, vaddr addr
, vaddr len
,
603 int flags
, CPUWatchpoint
**watchpoint
)
607 /* forbid ranges which are empty or run off the end of the address space */
608 if (len
== 0 || (addr
+ len
- 1) < addr
) {
609 error_report("tried to set invalid watchpoint at %"
610 VADDR_PRIx
", len=%" VADDR_PRIu
, addr
, len
);
613 wp
= g_malloc(sizeof(*wp
));
619 /* keep all GDB-injected watchpoints in front */
620 if (flags
& BP_GDB
) {
621 QTAILQ_INSERT_HEAD(&cpu
->watchpoints
, wp
, entry
);
623 QTAILQ_INSERT_TAIL(&cpu
->watchpoints
, wp
, entry
);
626 tlb_flush_page(cpu
, addr
);
633 /* Remove a specific watchpoint. */
634 int cpu_watchpoint_remove(CPUState
*cpu
, vaddr addr
, vaddr len
,
639 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
640 if (addr
== wp
->vaddr
&& len
== wp
->len
641 && flags
== (wp
->flags
& ~BP_WATCHPOINT_HIT
)) {
642 cpu_watchpoint_remove_by_ref(cpu
, wp
);
649 /* Remove a specific watchpoint by reference. */
650 void cpu_watchpoint_remove_by_ref(CPUState
*cpu
, CPUWatchpoint
*watchpoint
)
652 QTAILQ_REMOVE(&cpu
->watchpoints
, watchpoint
, entry
);
654 tlb_flush_page(cpu
, watchpoint
->vaddr
);
659 /* Remove all matching watchpoints. */
660 void cpu_watchpoint_remove_all(CPUState
*cpu
, int mask
)
662 CPUWatchpoint
*wp
, *next
;
664 QTAILQ_FOREACH_SAFE(wp
, &cpu
->watchpoints
, entry
, next
) {
665 if (wp
->flags
& mask
) {
666 cpu_watchpoint_remove_by_ref(cpu
, wp
);
671 /* Return true if this watchpoint address matches the specified
672 * access (ie the address range covered by the watchpoint overlaps
673 * partially or completely with the address range covered by the
676 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint
*wp
,
680 /* We know the lengths are non-zero, but a little caution is
681 * required to avoid errors in the case where the range ends
682 * exactly at the top of the address space and so addr + len
683 * wraps round to zero.
685 vaddr wpend
= wp
->vaddr
+ wp
->len
- 1;
686 vaddr addrend
= addr
+ len
- 1;
688 return !(addr
> wpend
|| wp
->vaddr
> addrend
);
693 /* Add a breakpoint. */
694 int cpu_breakpoint_insert(CPUState
*cpu
, vaddr pc
, int flags
,
695 CPUBreakpoint
**breakpoint
)
699 bp
= g_malloc(sizeof(*bp
));
704 /* keep all GDB-injected breakpoints in front */
705 if (flags
& BP_GDB
) {
706 QTAILQ_INSERT_HEAD(&cpu
->breakpoints
, bp
, entry
);
708 QTAILQ_INSERT_TAIL(&cpu
->breakpoints
, bp
, entry
);
711 breakpoint_invalidate(cpu
, pc
);
719 /* Remove a specific breakpoint. */
720 int cpu_breakpoint_remove(CPUState
*cpu
, vaddr pc
, int flags
)
724 QTAILQ_FOREACH(bp
, &cpu
->breakpoints
, entry
) {
725 if (bp
->pc
== pc
&& bp
->flags
== flags
) {
726 cpu_breakpoint_remove_by_ref(cpu
, bp
);
733 /* Remove a specific breakpoint by reference. */
734 void cpu_breakpoint_remove_by_ref(CPUState
*cpu
, CPUBreakpoint
*breakpoint
)
736 QTAILQ_REMOVE(&cpu
->breakpoints
, breakpoint
, entry
);
738 breakpoint_invalidate(cpu
, breakpoint
->pc
);
743 /* Remove all matching breakpoints. */
744 void cpu_breakpoint_remove_all(CPUState
*cpu
, int mask
)
746 CPUBreakpoint
*bp
, *next
;
748 QTAILQ_FOREACH_SAFE(bp
, &cpu
->breakpoints
, entry
, next
) {
749 if (bp
->flags
& mask
) {
750 cpu_breakpoint_remove_by_ref(cpu
, bp
);
755 /* enable or disable single step mode. EXCP_DEBUG is returned by the
756 CPU loop after each instruction */
757 void cpu_single_step(CPUState
*cpu
, int enabled
)
759 if (cpu
->singlestep_enabled
!= enabled
) {
760 cpu
->singlestep_enabled
= enabled
;
762 kvm_update_guest_debug(cpu
, 0);
764 /* must flush all the translated code to avoid inconsistencies */
765 /* XXX: only flush what is necessary */
766 CPUArchState
*env
= cpu
->env_ptr
;
772 void cpu_abort(CPUState
*cpu
, const char *fmt
, ...)
779 fprintf(stderr
, "qemu: fatal: ");
780 vfprintf(stderr
, fmt
, ap
);
781 fprintf(stderr
, "\n");
782 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
783 if (qemu_log_enabled()) {
784 qemu_log("qemu: fatal: ");
785 qemu_log_vprintf(fmt
, ap2
);
787 log_cpu_state(cpu
, CPU_DUMP_FPU
| CPU_DUMP_CCOP
);
793 #if defined(CONFIG_USER_ONLY)
795 struct sigaction act
;
796 sigfillset(&act
.sa_mask
);
797 act
.sa_handler
= SIG_DFL
;
798 sigaction(SIGABRT
, &act
, NULL
);
804 #if !defined(CONFIG_USER_ONLY)
805 /* Called from RCU critical section */
806 static RAMBlock
*qemu_get_ram_block(ram_addr_t addr
)
810 block
= atomic_rcu_read(&ram_list
.mru_block
);
811 if (block
&& addr
- block
->offset
< block
->max_length
) {
814 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
815 if (addr
- block
->offset
< block
->max_length
) {
820 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
824 /* It is safe to write mru_block outside the iothread lock. This
829 * xxx removed from list
833 * call_rcu(reclaim_ramblock, xxx);
836 * atomic_rcu_set is not needed here. The block was already published
837 * when it was placed into the list. Here we're just making an extra
838 * copy of the pointer.
840 ram_list
.mru_block
= block
;
844 static void tlb_reset_dirty_range_all(ram_addr_t start
, ram_addr_t length
)
850 end
= TARGET_PAGE_ALIGN(start
+ length
);
851 start
&= TARGET_PAGE_MASK
;
854 block
= qemu_get_ram_block(start
);
855 assert(block
== qemu_get_ram_block(end
- 1));
856 start1
= (uintptr_t)ramblock_ptr(block
, start
- block
->offset
);
857 cpu_tlb_reset_dirty_all(start1
, length
);
861 /* Note: start and end must be within the same ram block. */
862 void cpu_physical_memory_reset_dirty(ram_addr_t start
, ram_addr_t length
,
867 cpu_physical_memory_clear_dirty_range_type(start
, length
, client
);
870 tlb_reset_dirty_range_all(start
, length
);
874 static void cpu_physical_memory_set_dirty_tracking(bool enable
)
876 in_migration
= enable
;
879 /* Called from RCU critical section */
880 hwaddr
memory_region_section_get_iotlb(CPUState
*cpu
,
881 MemoryRegionSection
*section
,
883 hwaddr paddr
, hwaddr xlat
,
885 target_ulong
*address
)
890 if (memory_region_is_ram(section
->mr
)) {
892 iotlb
= (memory_region_get_ram_addr(section
->mr
) & TARGET_PAGE_MASK
)
894 if (!section
->readonly
) {
895 iotlb
|= PHYS_SECTION_NOTDIRTY
;
897 iotlb
|= PHYS_SECTION_ROM
;
900 iotlb
= section
- section
->address_space
->dispatch
->map
.sections
;
904 /* Make accesses to pages with watchpoints go via the
905 watchpoint trap routines. */
906 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
907 if (cpu_watchpoint_address_matches(wp
, vaddr
, TARGET_PAGE_SIZE
)) {
908 /* Avoid trapping reads of pages with a write breakpoint. */
909 if ((prot
& PAGE_WRITE
) || (wp
->flags
& BP_MEM_READ
)) {
910 iotlb
= PHYS_SECTION_WATCH
+ paddr
;
911 *address
|= TLB_MMIO
;
919 #endif /* defined(CONFIG_USER_ONLY) */
921 #if !defined(CONFIG_USER_ONLY)
923 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
925 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
);
927 static void *(*phys_mem_alloc
)(size_t size
, uint64_t *align
) =
931 * Set a custom physical guest memory alloator.
932 * Accelerators with unusual needs may need this. Hopefully, we can
933 * get rid of it eventually.
935 void phys_mem_set_alloc(void *(*alloc
)(size_t, uint64_t *align
))
937 phys_mem_alloc
= alloc
;
940 static uint16_t phys_section_add(PhysPageMap
*map
,
941 MemoryRegionSection
*section
)
943 /* The physical section number is ORed with a page-aligned
944 * pointer to produce the iotlb entries. Thus it should
945 * never overflow into the page-aligned value.
947 assert(map
->sections_nb
< TARGET_PAGE_SIZE
);
949 if (map
->sections_nb
== map
->sections_nb_alloc
) {
950 map
->sections_nb_alloc
= MAX(map
->sections_nb_alloc
* 2, 16);
951 map
->sections
= g_renew(MemoryRegionSection
, map
->sections
,
952 map
->sections_nb_alloc
);
954 map
->sections
[map
->sections_nb
] = *section
;
955 memory_region_ref(section
->mr
);
956 return map
->sections_nb
++;
959 static void phys_section_destroy(MemoryRegion
*mr
)
961 memory_region_unref(mr
);
964 subpage_t
*subpage
= container_of(mr
, subpage_t
, iomem
);
965 object_unref(OBJECT(&subpage
->iomem
));
970 static void phys_sections_free(PhysPageMap
*map
)
972 while (map
->sections_nb
> 0) {
973 MemoryRegionSection
*section
= &map
->sections
[--map
->sections_nb
];
974 phys_section_destroy(section
->mr
);
976 g_free(map
->sections
);
980 static void register_subpage(AddressSpaceDispatch
*d
, MemoryRegionSection
*section
)
983 hwaddr base
= section
->offset_within_address_space
985 MemoryRegionSection
*existing
= phys_page_find(d
->phys_map
, base
,
986 d
->map
.nodes
, d
->map
.sections
);
987 MemoryRegionSection subsection
= {
988 .offset_within_address_space
= base
,
989 .size
= int128_make64(TARGET_PAGE_SIZE
),
993 assert(existing
->mr
->subpage
|| existing
->mr
== &io_mem_unassigned
);
995 if (!(existing
->mr
->subpage
)) {
996 subpage
= subpage_init(d
->as
, base
);
997 subsection
.address_space
= d
->as
;
998 subsection
.mr
= &subpage
->iomem
;
999 phys_page_set(d
, base
>> TARGET_PAGE_BITS
, 1,
1000 phys_section_add(&d
->map
, &subsection
));
1002 subpage
= container_of(existing
->mr
, subpage_t
, iomem
);
1004 start
= section
->offset_within_address_space
& ~TARGET_PAGE_MASK
;
1005 end
= start
+ int128_get64(section
->size
) - 1;
1006 subpage_register(subpage
, start
, end
,
1007 phys_section_add(&d
->map
, section
));
1011 static void register_multipage(AddressSpaceDispatch
*d
,
1012 MemoryRegionSection
*section
)
1014 hwaddr start_addr
= section
->offset_within_address_space
;
1015 uint16_t section_index
= phys_section_add(&d
->map
, section
);
1016 uint64_t num_pages
= int128_get64(int128_rshift(section
->size
,
1020 phys_page_set(d
, start_addr
>> TARGET_PAGE_BITS
, num_pages
, section_index
);
1023 static void mem_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1025 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
1026 AddressSpaceDispatch
*d
= as
->next_dispatch
;
1027 MemoryRegionSection now
= *section
, remain
= *section
;
1028 Int128 page_size
= int128_make64(TARGET_PAGE_SIZE
);
1030 if (now
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1031 uint64_t left
= TARGET_PAGE_ALIGN(now
.offset_within_address_space
)
1032 - now
.offset_within_address_space
;
1034 now
.size
= int128_min(int128_make64(left
), now
.size
);
1035 register_subpage(d
, &now
);
1037 now
.size
= int128_zero();
1039 while (int128_ne(remain
.size
, now
.size
)) {
1040 remain
.size
= int128_sub(remain
.size
, now
.size
);
1041 remain
.offset_within_address_space
+= int128_get64(now
.size
);
1042 remain
.offset_within_region
+= int128_get64(now
.size
);
1044 if (int128_lt(remain
.size
, page_size
)) {
1045 register_subpage(d
, &now
);
1046 } else if (remain
.offset_within_address_space
& ~TARGET_PAGE_MASK
) {
1047 now
.size
= page_size
;
1048 register_subpage(d
, &now
);
1050 now
.size
= int128_and(now
.size
, int128_neg(page_size
));
1051 register_multipage(d
, &now
);
1056 void qemu_flush_coalesced_mmio_buffer(void)
1059 kvm_flush_coalesced_mmio_buffer();
1062 void qemu_mutex_lock_ramlist(void)
1064 qemu_mutex_lock(&ram_list
.mutex
);
1067 void qemu_mutex_unlock_ramlist(void)
1069 qemu_mutex_unlock(&ram_list
.mutex
);
1074 #include <sys/vfs.h>
1076 #define HUGETLBFS_MAGIC 0x958458f6
1078 static long gethugepagesize(const char *path
, Error
**errp
)
1084 ret
= statfs(path
, &fs
);
1085 } while (ret
!= 0 && errno
== EINTR
);
1088 error_setg_errno(errp
, errno
, "failed to get page size of file %s",
1093 if (fs
.f_type
!= HUGETLBFS_MAGIC
)
1094 fprintf(stderr
, "Warning: path not on HugeTLBFS: %s\n", path
);
1099 static void *file_ram_alloc(RAMBlock
*block
,
1105 char *sanitized_name
;
1110 Error
*local_err
= NULL
;
1112 hpagesize
= gethugepagesize(path
, &local_err
);
1114 error_propagate(errp
, local_err
);
1117 block
->mr
->align
= hpagesize
;
1119 if (memory
< hpagesize
) {
1120 error_setg(errp
, "memory size 0x" RAM_ADDR_FMT
" must be equal to "
1121 "or larger than huge page size 0x%" PRIx64
,
1126 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1128 "host lacks kvm mmu notifiers, -mem-path unsupported");
1132 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1133 sanitized_name
= g_strdup(memory_region_name(block
->mr
));
1134 for (c
= sanitized_name
; *c
!= '\0'; c
++) {
1139 filename
= g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path
,
1141 g_free(sanitized_name
);
1143 fd
= mkstemp(filename
);
1145 error_setg_errno(errp
, errno
,
1146 "unable to create backing store for hugepages");
1153 memory
= (memory
+hpagesize
-1) & ~(hpagesize
-1);
1156 * ftruncate is not supported by hugetlbfs in older
1157 * hosts, so don't bother bailing out on errors.
1158 * If anything goes wrong with it under other filesystems,
1161 if (ftruncate(fd
, memory
)) {
1162 perror("ftruncate");
1165 area
= mmap(0, memory
, PROT_READ
| PROT_WRITE
,
1166 (block
->flags
& RAM_SHARED ? MAP_SHARED
: MAP_PRIVATE
),
1168 if (area
== MAP_FAILED
) {
1169 error_setg_errno(errp
, errno
,
1170 "unable to map backing store for hugepages");
1176 os_mem_prealloc(fd
, area
, memory
);
1184 error_report("%s", error_get_pretty(*errp
));
1191 /* Called with the ramlist lock held. */
1192 static ram_addr_t
find_ram_offset(ram_addr_t size
)
1194 RAMBlock
*block
, *next_block
;
1195 ram_addr_t offset
= RAM_ADDR_MAX
, mingap
= RAM_ADDR_MAX
;
1197 assert(size
!= 0); /* it would hand out same offset multiple times */
1199 if (QLIST_EMPTY_RCU(&ram_list
.blocks
)) {
1203 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1204 ram_addr_t end
, next
= RAM_ADDR_MAX
;
1206 end
= block
->offset
+ block
->max_length
;
1208 QLIST_FOREACH_RCU(next_block
, &ram_list
.blocks
, next
) {
1209 if (next_block
->offset
>= end
) {
1210 next
= MIN(next
, next_block
->offset
);
1213 if (next
- end
>= size
&& next
- end
< mingap
) {
1215 mingap
= next
- end
;
1219 if (offset
== RAM_ADDR_MAX
) {
1220 fprintf(stderr
, "Failed to find gap of requested size: %" PRIu64
"\n",
1228 ram_addr_t
last_ram_offset(void)
1231 ram_addr_t last
= 0;
1234 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1235 last
= MAX(last
, block
->offset
+ block
->max_length
);
1241 static void qemu_ram_setup_dump(void *addr
, ram_addr_t size
)
1245 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1246 if (!machine_dump_guest_core(current_machine
)) {
1247 ret
= qemu_madvise(addr
, size
, QEMU_MADV_DONTDUMP
);
1249 perror("qemu_madvise");
1250 fprintf(stderr
, "madvise doesn't support MADV_DONTDUMP, "
1251 "but dump_guest_core=off specified\n");
1256 /* Called within an RCU critical section, or while the ramlist lock
1259 static RAMBlock
*find_ram_block(ram_addr_t addr
)
1263 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1264 if (block
->offset
== addr
) {
1272 /* Called with iothread lock held. */
1273 void qemu_ram_set_idstr(ram_addr_t addr
, const char *name
, DeviceState
*dev
)
1275 RAMBlock
*new_block
, *block
;
1278 new_block
= find_ram_block(addr
);
1280 assert(!new_block
->idstr
[0]);
1283 char *id
= qdev_get_dev_path(dev
);
1285 snprintf(new_block
->idstr
, sizeof(new_block
->idstr
), "%s/", id
);
1289 pstrcat(new_block
->idstr
, sizeof(new_block
->idstr
), name
);
1291 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1292 if (block
!= new_block
&& !strcmp(block
->idstr
, new_block
->idstr
)) {
1293 fprintf(stderr
, "RAMBlock \"%s\" already registered, abort!\n",
1301 /* Called with iothread lock held. */
1302 void qemu_ram_unset_idstr(ram_addr_t addr
)
1306 /* FIXME: arch_init.c assumes that this is not called throughout
1307 * migration. Ignore the problem since hot-unplug during migration
1308 * does not work anyway.
1312 block
= find_ram_block(addr
);
1314 memset(block
->idstr
, 0, sizeof(block
->idstr
));
1319 static int memory_try_enable_merging(void *addr
, size_t len
)
1321 if (!machine_mem_merge(current_machine
)) {
1322 /* disabled by the user */
1326 return qemu_madvise(addr
, len
, QEMU_MADV_MERGEABLE
);
1329 /* Only legal before guest might have detected the memory size: e.g. on
1330 * incoming migration, or right after reset.
1332 * As memory core doesn't know how is memory accessed, it is up to
1333 * resize callback to update device state and/or add assertions to detect
1334 * misuse, if necessary.
1336 int qemu_ram_resize(ram_addr_t base
, ram_addr_t newsize
, Error
**errp
)
1338 RAMBlock
*block
= find_ram_block(base
);
1342 newsize
= TARGET_PAGE_ALIGN(newsize
);
1344 if (block
->used_length
== newsize
) {
1348 if (!(block
->flags
& RAM_RESIZEABLE
)) {
1349 error_setg_errno(errp
, EINVAL
,
1350 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1351 " in != 0x" RAM_ADDR_FMT
, block
->idstr
,
1352 newsize
, block
->used_length
);
1356 if (block
->max_length
< newsize
) {
1357 error_setg_errno(errp
, EINVAL
,
1358 "Length too large: %s: 0x" RAM_ADDR_FMT
1359 " > 0x" RAM_ADDR_FMT
, block
->idstr
,
1360 newsize
, block
->max_length
);
1364 cpu_physical_memory_clear_dirty_range(block
->offset
, block
->used_length
);
1365 block
->used_length
= newsize
;
1366 cpu_physical_memory_set_dirty_range(block
->offset
, block
->used_length
);
1367 memory_region_set_size(block
->mr
, newsize
);
1368 if (block
->resized
) {
1369 block
->resized(block
->idstr
, newsize
, block
->host
);
1374 static ram_addr_t
ram_block_add(RAMBlock
*new_block
, Error
**errp
)
1377 RAMBlock
*last_block
= NULL
;
1378 ram_addr_t old_ram_size
, new_ram_size
;
1380 old_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1382 qemu_mutex_lock_ramlist();
1383 new_block
->offset
= find_ram_offset(new_block
->max_length
);
1385 if (!new_block
->host
) {
1386 if (xen_enabled()) {
1387 xen_ram_alloc(new_block
->offset
, new_block
->max_length
,
1390 new_block
->host
= phys_mem_alloc(new_block
->max_length
,
1391 &new_block
->mr
->align
);
1392 if (!new_block
->host
) {
1393 error_setg_errno(errp
, errno
,
1394 "cannot set up guest memory '%s'",
1395 memory_region_name(new_block
->mr
));
1396 qemu_mutex_unlock_ramlist();
1399 memory_try_enable_merging(new_block
->host
, new_block
->max_length
);
1403 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1404 * QLIST (which has an RCU-friendly variant) does not have insertion at
1405 * tail, so save the last element in last_block.
1407 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1409 if (block
->max_length
< new_block
->max_length
) {
1414 QLIST_INSERT_BEFORE_RCU(block
, new_block
, next
);
1415 } else if (last_block
) {
1416 QLIST_INSERT_AFTER_RCU(last_block
, new_block
, next
);
1417 } else { /* list is empty */
1418 QLIST_INSERT_HEAD_RCU(&ram_list
.blocks
, new_block
, next
);
1420 ram_list
.mru_block
= NULL
;
1422 /* Write list before version */
1425 qemu_mutex_unlock_ramlist();
1427 new_ram_size
= last_ram_offset() >> TARGET_PAGE_BITS
;
1429 if (new_ram_size
> old_ram_size
) {
1432 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1433 for (i
= 0; i
< DIRTY_MEMORY_NUM
; i
++) {
1434 ram_list
.dirty_memory
[i
] =
1435 bitmap_zero_extend(ram_list
.dirty_memory
[i
],
1436 old_ram_size
, new_ram_size
);
1439 cpu_physical_memory_set_dirty_range(new_block
->offset
,
1440 new_block
->used_length
);
1442 if (new_block
->host
) {
1443 qemu_ram_setup_dump(new_block
->host
, new_block
->max_length
);
1444 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_HUGEPAGE
);
1445 qemu_madvise(new_block
->host
, new_block
->max_length
, QEMU_MADV_DONTFORK
);
1446 if (kvm_enabled()) {
1447 kvm_setup_guest_memory(new_block
->host
, new_block
->max_length
);
1451 return new_block
->offset
;
1455 ram_addr_t
qemu_ram_alloc_from_file(ram_addr_t size
, MemoryRegion
*mr
,
1456 bool share
, const char *mem_path
,
1459 RAMBlock
*new_block
;
1461 Error
*local_err
= NULL
;
1463 if (xen_enabled()) {
1464 error_setg(errp
, "-mem-path not supported with Xen");
1468 if (phys_mem_alloc
!= qemu_anon_ram_alloc
) {
1470 * file_ram_alloc() needs to allocate just like
1471 * phys_mem_alloc, but we haven't bothered to provide
1475 "-mem-path not supported with this accelerator");
1479 size
= TARGET_PAGE_ALIGN(size
);
1480 new_block
= g_malloc0(sizeof(*new_block
));
1482 new_block
->used_length
= size
;
1483 new_block
->max_length
= size
;
1484 new_block
->flags
= share ? RAM_SHARED
: 0;
1485 new_block
->host
= file_ram_alloc(new_block
, size
,
1487 if (!new_block
->host
) {
1492 addr
= ram_block_add(new_block
, &local_err
);
1495 error_propagate(errp
, local_err
);
1503 ram_addr_t
qemu_ram_alloc_internal(ram_addr_t size
, ram_addr_t max_size
,
1504 void (*resized
)(const char*,
1507 void *host
, bool resizeable
,
1508 MemoryRegion
*mr
, Error
**errp
)
1510 RAMBlock
*new_block
;
1512 Error
*local_err
= NULL
;
1514 size
= TARGET_PAGE_ALIGN(size
);
1515 max_size
= TARGET_PAGE_ALIGN(max_size
);
1516 new_block
= g_malloc0(sizeof(*new_block
));
1518 new_block
->resized
= resized
;
1519 new_block
->used_length
= size
;
1520 new_block
->max_length
= max_size
;
1521 assert(max_size
>= size
);
1523 new_block
->host
= host
;
1525 new_block
->flags
|= RAM_PREALLOC
;
1528 new_block
->flags
|= RAM_RESIZEABLE
;
1530 addr
= ram_block_add(new_block
, &local_err
);
1533 error_propagate(errp
, local_err
);
1539 ram_addr_t
qemu_ram_alloc_from_ptr(ram_addr_t size
, void *host
,
1540 MemoryRegion
*mr
, Error
**errp
)
1542 return qemu_ram_alloc_internal(size
, size
, NULL
, host
, false, mr
, errp
);
1545 ram_addr_t
qemu_ram_alloc(ram_addr_t size
, MemoryRegion
*mr
, Error
**errp
)
1547 return qemu_ram_alloc_internal(size
, size
, NULL
, NULL
, false, mr
, errp
);
1550 ram_addr_t
qemu_ram_alloc_resizeable(ram_addr_t size
, ram_addr_t maxsz
,
1551 void (*resized
)(const char*,
1554 MemoryRegion
*mr
, Error
**errp
)
1556 return qemu_ram_alloc_internal(size
, maxsz
, resized
, NULL
, true, mr
, errp
);
1559 void qemu_ram_free_from_ptr(ram_addr_t addr
)
1563 qemu_mutex_lock_ramlist();
1564 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1565 if (addr
== block
->offset
) {
1566 QLIST_REMOVE_RCU(block
, next
);
1567 ram_list
.mru_block
= NULL
;
1568 /* Write list before version */
1571 g_free_rcu(block
, rcu
);
1575 qemu_mutex_unlock_ramlist();
1578 static void reclaim_ramblock(RAMBlock
*block
)
1580 if (block
->flags
& RAM_PREALLOC
) {
1582 } else if (xen_enabled()) {
1583 xen_invalidate_map_cache_entry(block
->host
);
1585 } else if (block
->fd
>= 0) {
1586 munmap(block
->host
, block
->max_length
);
1590 qemu_anon_ram_free(block
->host
, block
->max_length
);
1595 void qemu_ram_free(ram_addr_t addr
)
1599 qemu_mutex_lock_ramlist();
1600 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1601 if (addr
== block
->offset
) {
1602 QLIST_REMOVE_RCU(block
, next
);
1603 ram_list
.mru_block
= NULL
;
1604 /* Write list before version */
1607 call_rcu(block
, reclaim_ramblock
, rcu
);
1611 qemu_mutex_unlock_ramlist();
1615 void qemu_ram_remap(ram_addr_t addr
, ram_addr_t length
)
1622 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1623 offset
= addr
- block
->offset
;
1624 if (offset
< block
->max_length
) {
1625 vaddr
= ramblock_ptr(block
, offset
);
1626 if (block
->flags
& RAM_PREALLOC
) {
1628 } else if (xen_enabled()) {
1632 if (block
->fd
>= 0) {
1633 flags
|= (block
->flags
& RAM_SHARED ?
1634 MAP_SHARED
: MAP_PRIVATE
);
1635 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1636 flags
, block
->fd
, offset
);
1639 * Remap needs to match alloc. Accelerators that
1640 * set phys_mem_alloc never remap. If they did,
1641 * we'd need a remap hook here.
1643 assert(phys_mem_alloc
== qemu_anon_ram_alloc
);
1645 flags
|= MAP_PRIVATE
| MAP_ANONYMOUS
;
1646 area
= mmap(vaddr
, length
, PROT_READ
| PROT_WRITE
,
1649 if (area
!= vaddr
) {
1650 fprintf(stderr
, "Could not remap addr: "
1651 RAM_ADDR_FMT
"@" RAM_ADDR_FMT
"\n",
1655 memory_try_enable_merging(vaddr
, length
);
1656 qemu_ram_setup_dump(vaddr
, length
);
1661 #endif /* !_WIN32 */
1663 int qemu_get_ram_fd(ram_addr_t addr
)
1669 block
= qemu_get_ram_block(addr
);
1675 void *qemu_get_ram_block_host_ptr(ram_addr_t addr
)
1681 block
= qemu_get_ram_block(addr
);
1682 ptr
= ramblock_ptr(block
, 0);
1687 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1688 * This should not be used for general purpose DMA. Use address_space_map
1689 * or address_space_rw instead. For local memory (e.g. video ram) that the
1690 * device owns, use memory_region_get_ram_ptr.
1692 * By the time this function returns, the returned pointer is not protected
1693 * by RCU anymore. If the caller is not within an RCU critical section and
1694 * does not hold the iothread lock, it must have other means of protecting the
1695 * pointer, such as a reference to the region that includes the incoming
1698 void *qemu_get_ram_ptr(ram_addr_t addr
)
1704 block
= qemu_get_ram_block(addr
);
1706 if (xen_enabled() && block
->host
== NULL
) {
1707 /* We need to check if the requested address is in the RAM
1708 * because we don't want to map the entire memory in QEMU.
1709 * In that case just map until the end of the page.
1711 if (block
->offset
== 0) {
1712 ptr
= xen_map_cache(addr
, 0, 0);
1716 block
->host
= xen_map_cache(block
->offset
, block
->max_length
, 1);
1718 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1725 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1726 * but takes a size argument.
1728 * By the time this function returns, the returned pointer is not protected
1729 * by RCU anymore. If the caller is not within an RCU critical section and
1730 * does not hold the iothread lock, it must have other means of protecting the
1731 * pointer, such as a reference to the region that includes the incoming
1734 static void *qemu_ram_ptr_length(ram_addr_t addr
, hwaddr
*size
)
1740 if (xen_enabled()) {
1741 return xen_map_cache(addr
, *size
, 1);
1745 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1746 if (addr
- block
->offset
< block
->max_length
) {
1747 if (addr
- block
->offset
+ *size
> block
->max_length
)
1748 *size
= block
->max_length
- addr
+ block
->offset
;
1749 ptr
= ramblock_ptr(block
, addr
- block
->offset
);
1755 fprintf(stderr
, "Bad ram offset %" PRIx64
"\n", (uint64_t)addr
);
1760 /* Some of the softmmu routines need to translate from a host pointer
1761 * (typically a TLB entry) back to a ram offset.
1763 * By the time this function returns, the returned pointer is not protected
1764 * by RCU anymore. If the caller is not within an RCU critical section and
1765 * does not hold the iothread lock, it must have other means of protecting the
1766 * pointer, such as a reference to the region that includes the incoming
1769 MemoryRegion
*qemu_ram_addr_from_host(void *ptr
, ram_addr_t
*ram_addr
)
1772 uint8_t *host
= ptr
;
1775 if (xen_enabled()) {
1777 *ram_addr
= xen_ram_addr_from_mapcache(ptr
);
1778 mr
= qemu_get_ram_block(*ram_addr
)->mr
;
1784 block
= atomic_rcu_read(&ram_list
.mru_block
);
1785 if (block
&& block
->host
&& host
- block
->host
< block
->max_length
) {
1789 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
1790 /* This case append when the block is not mapped. */
1791 if (block
->host
== NULL
) {
1794 if (host
- block
->host
< block
->max_length
) {
1803 *ram_addr
= block
->offset
+ (host
- block
->host
);
1809 static void notdirty_mem_write(void *opaque
, hwaddr ram_addr
,
1810 uint64_t val
, unsigned size
)
1812 if (!cpu_physical_memory_get_dirty_flag(ram_addr
, DIRTY_MEMORY_CODE
)) {
1813 tb_invalidate_phys_page_fast(ram_addr
, size
);
1817 stb_p(qemu_get_ram_ptr(ram_addr
), val
);
1820 stw_p(qemu_get_ram_ptr(ram_addr
), val
);
1823 stl_p(qemu_get_ram_ptr(ram_addr
), val
);
1828 cpu_physical_memory_set_dirty_range_nocode(ram_addr
, size
);
1829 /* we remove the notdirty callback only if the code has been
1831 if (!cpu_physical_memory_is_clean(ram_addr
)) {
1832 CPUArchState
*env
= current_cpu
->env_ptr
;
1833 tlb_set_dirty(env
, current_cpu
->mem_io_vaddr
);
1837 static bool notdirty_mem_accepts(void *opaque
, hwaddr addr
,
1838 unsigned size
, bool is_write
)
1843 static const MemoryRegionOps notdirty_mem_ops
= {
1844 .write
= notdirty_mem_write
,
1845 .valid
.accepts
= notdirty_mem_accepts
,
1846 .endianness
= DEVICE_NATIVE_ENDIAN
,
1849 /* Generate a debug exception if a watchpoint has been hit. */
1850 static void check_watchpoint(int offset
, int len
, int flags
)
1852 CPUState
*cpu
= current_cpu
;
1853 CPUArchState
*env
= cpu
->env_ptr
;
1854 target_ulong pc
, cs_base
;
1859 if (cpu
->watchpoint_hit
) {
1860 /* We re-entered the check after replacing the TB. Now raise
1861 * the debug interrupt so that is will trigger after the
1862 * current instruction. */
1863 cpu_interrupt(cpu
, CPU_INTERRUPT_DEBUG
);
1866 vaddr
= (cpu
->mem_io_vaddr
& TARGET_PAGE_MASK
) + offset
;
1867 QTAILQ_FOREACH(wp
, &cpu
->watchpoints
, entry
) {
1868 if (cpu_watchpoint_address_matches(wp
, vaddr
, len
)
1869 && (wp
->flags
& flags
)) {
1870 if (flags
== BP_MEM_READ
) {
1871 wp
->flags
|= BP_WATCHPOINT_HIT_READ
;
1873 wp
->flags
|= BP_WATCHPOINT_HIT_WRITE
;
1875 wp
->hitaddr
= vaddr
;
1876 if (!cpu
->watchpoint_hit
) {
1877 cpu
->watchpoint_hit
= wp
;
1878 tb_check_watchpoint(cpu
);
1879 if (wp
->flags
& BP_STOP_BEFORE_ACCESS
) {
1880 cpu
->exception_index
= EXCP_DEBUG
;
1883 cpu_get_tb_cpu_state(env
, &pc
, &cs_base
, &cpu_flags
);
1884 tb_gen_code(cpu
, pc
, cs_base
, cpu_flags
, 1);
1885 cpu_resume_from_signal(cpu
, NULL
);
1889 wp
->flags
&= ~BP_WATCHPOINT_HIT
;
1894 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1895 so these check for a hit then pass through to the normal out-of-line
1897 static uint64_t watch_mem_read(void *opaque
, hwaddr addr
,
1900 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, BP_MEM_READ
);
1902 case 1: return ldub_phys(&address_space_memory
, addr
);
1903 case 2: return lduw_phys(&address_space_memory
, addr
);
1904 case 4: return ldl_phys(&address_space_memory
, addr
);
1909 static void watch_mem_write(void *opaque
, hwaddr addr
,
1910 uint64_t val
, unsigned size
)
1912 check_watchpoint(addr
& ~TARGET_PAGE_MASK
, size
, BP_MEM_WRITE
);
1915 stb_phys(&address_space_memory
, addr
, val
);
1918 stw_phys(&address_space_memory
, addr
, val
);
1921 stl_phys(&address_space_memory
, addr
, val
);
1927 static const MemoryRegionOps watch_mem_ops
= {
1928 .read
= watch_mem_read
,
1929 .write
= watch_mem_write
,
1930 .endianness
= DEVICE_NATIVE_ENDIAN
,
1933 static uint64_t subpage_read(void *opaque
, hwaddr addr
,
1936 subpage_t
*subpage
= opaque
;
1939 #if defined(DEBUG_SUBPAGE)
1940 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
"\n", __func__
,
1941 subpage
, len
, addr
);
1943 address_space_read(subpage
->as
, addr
+ subpage
->base
, buf
, len
);
1958 static void subpage_write(void *opaque
, hwaddr addr
,
1959 uint64_t value
, unsigned len
)
1961 subpage_t
*subpage
= opaque
;
1964 #if defined(DEBUG_SUBPAGE)
1965 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1966 " value %"PRIx64
"\n",
1967 __func__
, subpage
, len
, addr
, value
);
1985 address_space_write(subpage
->as
, addr
+ subpage
->base
, buf
, len
);
1988 static bool subpage_accepts(void *opaque
, hwaddr addr
,
1989 unsigned len
, bool is_write
)
1991 subpage_t
*subpage
= opaque
;
1992 #if defined(DEBUG_SUBPAGE)
1993 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx
"\n",
1994 __func__
, subpage
, is_write ?
'w' : 'r', len
, addr
);
1997 return address_space_access_valid(subpage
->as
, addr
+ subpage
->base
,
2001 static const MemoryRegionOps subpage_ops
= {
2002 .read
= subpage_read
,
2003 .write
= subpage_write
,
2004 .impl
.min_access_size
= 1,
2005 .impl
.max_access_size
= 8,
2006 .valid
.min_access_size
= 1,
2007 .valid
.max_access_size
= 8,
2008 .valid
.accepts
= subpage_accepts
,
2009 .endianness
= DEVICE_NATIVE_ENDIAN
,
2012 static int subpage_register (subpage_t
*mmio
, uint32_t start
, uint32_t end
,
2017 if (start
>= TARGET_PAGE_SIZE
|| end
>= TARGET_PAGE_SIZE
)
2019 idx
= SUBPAGE_IDX(start
);
2020 eidx
= SUBPAGE_IDX(end
);
2021 #if defined(DEBUG_SUBPAGE)
2022 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2023 __func__
, mmio
, start
, end
, idx
, eidx
, section
);
2025 for (; idx
<= eidx
; idx
++) {
2026 mmio
->sub_section
[idx
] = section
;
2032 static subpage_t
*subpage_init(AddressSpace
*as
, hwaddr base
)
2036 mmio
= g_malloc0(sizeof(subpage_t
));
2040 memory_region_init_io(&mmio
->iomem
, NULL
, &subpage_ops
, mmio
,
2041 NULL
, TARGET_PAGE_SIZE
);
2042 mmio
->iomem
.subpage
= true;
2043 #if defined(DEBUG_SUBPAGE)
2044 printf("%s: %p base " TARGET_FMT_plx
" len %08x\n", __func__
,
2045 mmio
, base
, TARGET_PAGE_SIZE
);
2047 subpage_register(mmio
, 0, TARGET_PAGE_SIZE
-1, PHYS_SECTION_UNASSIGNED
);
2052 static uint16_t dummy_section(PhysPageMap
*map
, AddressSpace
*as
,
2056 MemoryRegionSection section
= {
2057 .address_space
= as
,
2059 .offset_within_address_space
= 0,
2060 .offset_within_region
= 0,
2061 .size
= int128_2_64(),
2064 return phys_section_add(map
, §ion
);
2067 MemoryRegion
*iotlb_to_region(CPUState
*cpu
, hwaddr index
)
2069 AddressSpaceDispatch
*d
= atomic_rcu_read(&cpu
->memory_dispatch
);
2070 MemoryRegionSection
*sections
= d
->map
.sections
;
2072 return sections
[index
& ~TARGET_PAGE_MASK
].mr
;
2075 static void io_mem_init(void)
2077 memory_region_init_io(&io_mem_rom
, NULL
, &unassigned_mem_ops
, NULL
, NULL
, UINT64_MAX
);
2078 memory_region_init_io(&io_mem_unassigned
, NULL
, &unassigned_mem_ops
, NULL
,
2080 memory_region_init_io(&io_mem_notdirty
, NULL
, ¬dirty_mem_ops
, NULL
,
2082 memory_region_init_io(&io_mem_watch
, NULL
, &watch_mem_ops
, NULL
,
2086 static void mem_begin(MemoryListener
*listener
)
2088 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2089 AddressSpaceDispatch
*d
= g_new0(AddressSpaceDispatch
, 1);
2092 n
= dummy_section(&d
->map
, as
, &io_mem_unassigned
);
2093 assert(n
== PHYS_SECTION_UNASSIGNED
);
2094 n
= dummy_section(&d
->map
, as
, &io_mem_notdirty
);
2095 assert(n
== PHYS_SECTION_NOTDIRTY
);
2096 n
= dummy_section(&d
->map
, as
, &io_mem_rom
);
2097 assert(n
== PHYS_SECTION_ROM
);
2098 n
= dummy_section(&d
->map
, as
, &io_mem_watch
);
2099 assert(n
== PHYS_SECTION_WATCH
);
2101 d
->phys_map
= (PhysPageEntry
) { .ptr
= PHYS_MAP_NODE_NIL
, .skip
= 1 };
2103 as
->next_dispatch
= d
;
2106 static void address_space_dispatch_free(AddressSpaceDispatch
*d
)
2108 phys_sections_free(&d
->map
);
2112 static void mem_commit(MemoryListener
*listener
)
2114 AddressSpace
*as
= container_of(listener
, AddressSpace
, dispatch_listener
);
2115 AddressSpaceDispatch
*cur
= as
->dispatch
;
2116 AddressSpaceDispatch
*next
= as
->next_dispatch
;
2118 phys_page_compact_all(next
, next
->map
.nodes_nb
);
2120 atomic_rcu_set(&as
->dispatch
, next
);
2122 call_rcu(cur
, address_space_dispatch_free
, rcu
);
2126 static void tcg_commit(MemoryListener
*listener
)
2130 /* since each CPU stores ram addresses in its TLB cache, we must
2131 reset the modified entries */
2134 /* FIXME: Disentangle the cpu.h circular files deps so we can
2135 directly get the right CPU from listener. */
2136 if (cpu
->tcg_as_listener
!= listener
) {
2139 cpu_reload_memory_map(cpu
);
2143 static void core_log_global_start(MemoryListener
*listener
)
2145 cpu_physical_memory_set_dirty_tracking(true);
2148 static void core_log_global_stop(MemoryListener
*listener
)
2150 cpu_physical_memory_set_dirty_tracking(false);
2153 static MemoryListener core_memory_listener
= {
2154 .log_global_start
= core_log_global_start
,
2155 .log_global_stop
= core_log_global_stop
,
2159 void address_space_init_dispatch(AddressSpace
*as
)
2161 as
->dispatch
= NULL
;
2162 as
->dispatch_listener
= (MemoryListener
) {
2164 .commit
= mem_commit
,
2165 .region_add
= mem_add
,
2166 .region_nop
= mem_add
,
2169 memory_listener_register(&as
->dispatch_listener
, as
);
2172 void address_space_unregister(AddressSpace
*as
)
2174 memory_listener_unregister(&as
->dispatch_listener
);
2177 void address_space_destroy_dispatch(AddressSpace
*as
)
2179 AddressSpaceDispatch
*d
= as
->dispatch
;
2181 atomic_rcu_set(&as
->dispatch
, NULL
);
2183 call_rcu(d
, address_space_dispatch_free
, rcu
);
2187 static void memory_map_init(void)
2189 system_memory
= g_malloc(sizeof(*system_memory
));
2191 memory_region_init(system_memory
, NULL
, "system", UINT64_MAX
);
2192 address_space_init(&address_space_memory
, system_memory
, "memory");
2194 system_io
= g_malloc(sizeof(*system_io
));
2195 memory_region_init_io(system_io
, NULL
, &unassigned_io_ops
, NULL
, "io",
2197 address_space_init(&address_space_io
, system_io
, "I/O");
2199 memory_listener_register(&core_memory_listener
, &address_space_memory
);
2202 MemoryRegion
*get_system_memory(void)
2204 return system_memory
;
2207 MemoryRegion
*get_system_io(void)
2212 #endif /* !defined(CONFIG_USER_ONLY) */
2214 /* physical memory access (slow version, mainly for debug) */
2215 #if defined(CONFIG_USER_ONLY)
2216 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
2217 uint8_t *buf
, int len
, int is_write
)
2224 page
= addr
& TARGET_PAGE_MASK
;
2225 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
2228 flags
= page_get_flags(page
);
2229 if (!(flags
& PAGE_VALID
))
2232 if (!(flags
& PAGE_WRITE
))
2234 /* XXX: this code should not depend on lock_user */
2235 if (!(p
= lock_user(VERIFY_WRITE
, addr
, l
, 0)))
2238 unlock_user(p
, addr
, l
);
2240 if (!(flags
& PAGE_READ
))
2242 /* XXX: this code should not depend on lock_user */
2243 if (!(p
= lock_user(VERIFY_READ
, addr
, l
, 1)))
2246 unlock_user(p
, addr
, 0);
2257 static void invalidate_and_set_dirty(hwaddr addr
,
2260 if (cpu_physical_memory_range_includes_clean(addr
, length
)) {
2261 tb_invalidate_phys_range(addr
, addr
+ length
, 0);
2262 cpu_physical_memory_set_dirty_range_nocode(addr
, length
);
2264 xen_modified_memory(addr
, length
);
2267 static int memory_access_size(MemoryRegion
*mr
, unsigned l
, hwaddr addr
)
2269 unsigned access_size_max
= mr
->ops
->valid
.max_access_size
;
2271 /* Regions are assumed to support 1-4 byte accesses unless
2272 otherwise specified. */
2273 if (access_size_max
== 0) {
2274 access_size_max
= 4;
2277 /* Bound the maximum access by the alignment of the address. */
2278 if (!mr
->ops
->impl
.unaligned
) {
2279 unsigned align_size_max
= addr
& -addr
;
2280 if (align_size_max
!= 0 && align_size_max
< access_size_max
) {
2281 access_size_max
= align_size_max
;
2285 /* Don't attempt accesses larger than the maximum. */
2286 if (l
> access_size_max
) {
2287 l
= access_size_max
;
2290 l
= 1 << (qemu_fls(l
) - 1);
2296 bool address_space_rw(AddressSpace
*as
, hwaddr addr
, uint8_t *buf
,
2297 int len
, bool is_write
)
2308 mr
= address_space_translate(as
, addr
, &addr1
, &l
, is_write
);
2311 if (!memory_access_is_direct(mr
, is_write
)) {
2312 l
= memory_access_size(mr
, l
, addr1
);
2313 /* XXX: could force current_cpu to NULL to avoid
2317 /* 64 bit write access */
2319 error
|= io_mem_write(mr
, addr1
, val
, 8);
2322 /* 32 bit write access */
2324 error
|= io_mem_write(mr
, addr1
, val
, 4);
2327 /* 16 bit write access */
2329 error
|= io_mem_write(mr
, addr1
, val
, 2);
2332 /* 8 bit write access */
2334 error
|= io_mem_write(mr
, addr1
, val
, 1);
2340 addr1
+= memory_region_get_ram_addr(mr
);
2342 ptr
= qemu_get_ram_ptr(addr1
);
2343 memcpy(ptr
, buf
, l
);
2344 invalidate_and_set_dirty(addr1
, l
);
2347 if (!memory_access_is_direct(mr
, is_write
)) {
2349 l
= memory_access_size(mr
, l
, addr1
);
2352 /* 64 bit read access */
2353 error
|= io_mem_read(mr
, addr1
, &val
, 8);
2357 /* 32 bit read access */
2358 error
|= io_mem_read(mr
, addr1
, &val
, 4);
2362 /* 16 bit read access */
2363 error
|= io_mem_read(mr
, addr1
, &val
, 2);
2367 /* 8 bit read access */
2368 error
|= io_mem_read(mr
, addr1
, &val
, 1);
2376 ptr
= qemu_get_ram_ptr(mr
->ram_addr
+ addr1
);
2377 memcpy(buf
, ptr
, l
);
2388 bool address_space_write(AddressSpace
*as
, hwaddr addr
,
2389 const uint8_t *buf
, int len
)
2391 return address_space_rw(as
, addr
, (uint8_t *)buf
, len
, true);
2394 bool address_space_read(AddressSpace
*as
, hwaddr addr
, uint8_t *buf
, int len
)
2396 return address_space_rw(as
, addr
, buf
, len
, false);
2400 void cpu_physical_memory_rw(hwaddr addr
, uint8_t *buf
,
2401 int len
, int is_write
)
2403 address_space_rw(&address_space_memory
, addr
, buf
, len
, is_write
);
2406 enum write_rom_type
{
2411 static inline void cpu_physical_memory_write_rom_internal(AddressSpace
*as
,
2412 hwaddr addr
, const uint8_t *buf
, int len
, enum write_rom_type type
)
2421 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2423 if (!(memory_region_is_ram(mr
) ||
2424 memory_region_is_romd(mr
))) {
2427 addr1
+= memory_region_get_ram_addr(mr
);
2429 ptr
= qemu_get_ram_ptr(addr1
);
2432 memcpy(ptr
, buf
, l
);
2433 invalidate_and_set_dirty(addr1
, l
);
2436 flush_icache_range((uintptr_t)ptr
, (uintptr_t)ptr
+ l
);
2446 /* used for ROM loading : can write in RAM and ROM */
2447 void cpu_physical_memory_write_rom(AddressSpace
*as
, hwaddr addr
,
2448 const uint8_t *buf
, int len
)
2450 cpu_physical_memory_write_rom_internal(as
, addr
, buf
, len
, WRITE_DATA
);
2453 void cpu_flush_icache_range(hwaddr start
, int len
)
2456 * This function should do the same thing as an icache flush that was
2457 * triggered from within the guest. For TCG we are always cache coherent,
2458 * so there is no need to flush anything. For KVM / Xen we need to flush
2459 * the host's instruction cache at least.
2461 if (tcg_enabled()) {
2465 cpu_physical_memory_write_rom_internal(&address_space_memory
,
2466 start
, NULL
, len
, FLUSH_CACHE
);
2477 static BounceBuffer bounce
;
2479 typedef struct MapClient
{
2481 QLIST_ENTRY(MapClient
) link
;
2484 QemuMutex map_client_list_lock
;
2485 static QLIST_HEAD(map_client_list
, MapClient
) map_client_list
2486 = QLIST_HEAD_INITIALIZER(map_client_list
);
2488 static void cpu_unregister_map_client_do(MapClient
*client
)
2490 QLIST_REMOVE(client
, link
);
2494 static void cpu_notify_map_clients_locked(void)
2498 while (!QLIST_EMPTY(&map_client_list
)) {
2499 client
= QLIST_FIRST(&map_client_list
);
2500 qemu_bh_schedule(client
->bh
);
2501 cpu_unregister_map_client_do(client
);
2505 void cpu_register_map_client(QEMUBH
*bh
)
2507 MapClient
*client
= g_malloc(sizeof(*client
));
2509 qemu_mutex_lock(&map_client_list_lock
);
2511 QLIST_INSERT_HEAD(&map_client_list
, client
, link
);
2512 if (!atomic_read(&bounce
.in_use
)) {
2513 cpu_notify_map_clients_locked();
2515 qemu_mutex_unlock(&map_client_list_lock
);
2518 void cpu_exec_init_all(void)
2520 qemu_mutex_init(&ram_list
.mutex
);
2523 qemu_mutex_init(&map_client_list_lock
);
2526 void cpu_unregister_map_client(QEMUBH
*bh
)
2530 qemu_mutex_lock(&map_client_list_lock
);
2531 QLIST_FOREACH(client
, &map_client_list
, link
) {
2532 if (client
->bh
== bh
) {
2533 cpu_unregister_map_client_do(client
);
2537 qemu_mutex_unlock(&map_client_list_lock
);
2540 static void cpu_notify_map_clients(void)
2542 qemu_mutex_lock(&map_client_list_lock
);
2543 cpu_notify_map_clients_locked();
2544 qemu_mutex_unlock(&map_client_list_lock
);
2547 bool address_space_access_valid(AddressSpace
*as
, hwaddr addr
, int len
, bool is_write
)
2554 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2555 if (!memory_access_is_direct(mr
, is_write
)) {
2556 l
= memory_access_size(mr
, l
, addr
);
2557 if (!memory_region_access_valid(mr
, xlat
, l
, is_write
)) {
2568 /* Map a physical memory region into a host virtual address.
2569 * May map a subset of the requested range, given by and returned in *plen.
2570 * May return NULL if resources needed to perform the mapping are exhausted.
2571 * Use only for reads OR writes - not for read-modify-write operations.
2572 * Use cpu_register_map_client() to know when retrying the map operation is
2573 * likely to succeed.
2575 void *address_space_map(AddressSpace
*as
,
2582 hwaddr l
, xlat
, base
;
2583 MemoryRegion
*mr
, *this_mr
;
2591 mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2592 if (!memory_access_is_direct(mr
, is_write
)) {
2593 if (atomic_xchg(&bounce
.in_use
, true)) {
2596 /* Avoid unbounded allocations */
2597 l
= MIN(l
, TARGET_PAGE_SIZE
);
2598 bounce
.buffer
= qemu_memalign(TARGET_PAGE_SIZE
, l
);
2602 memory_region_ref(mr
);
2605 address_space_read(as
, addr
, bounce
.buffer
, l
);
2609 return bounce
.buffer
;
2613 raddr
= memory_region_get_ram_addr(mr
);
2624 this_mr
= address_space_translate(as
, addr
, &xlat
, &l
, is_write
);
2625 if (this_mr
!= mr
|| xlat
!= base
+ done
) {
2630 memory_region_ref(mr
);
2632 return qemu_ram_ptr_length(raddr
+ base
, plen
);
2635 /* Unmaps a memory region previously mapped by address_space_map().
2636 * Will also mark the memory as dirty if is_write == 1. access_len gives
2637 * the amount of memory that was actually read or written by the caller.
2639 void address_space_unmap(AddressSpace
*as
, void *buffer
, hwaddr len
,
2640 int is_write
, hwaddr access_len
)
2642 if (buffer
!= bounce
.buffer
) {
2646 mr
= qemu_ram_addr_from_host(buffer
, &addr1
);
2649 invalidate_and_set_dirty(addr1
, access_len
);
2651 if (xen_enabled()) {
2652 xen_invalidate_map_cache_entry(buffer
);
2654 memory_region_unref(mr
);
2658 address_space_write(as
, bounce
.addr
, bounce
.buffer
, access_len
);
2660 qemu_vfree(bounce
.buffer
);
2661 bounce
.buffer
= NULL
;
2662 memory_region_unref(bounce
.mr
);
2663 atomic_mb_set(&bounce
.in_use
, false);
2664 cpu_notify_map_clients();
2667 void *cpu_physical_memory_map(hwaddr addr
,
2671 return address_space_map(&address_space_memory
, addr
, plen
, is_write
);
2674 void cpu_physical_memory_unmap(void *buffer
, hwaddr len
,
2675 int is_write
, hwaddr access_len
)
2677 return address_space_unmap(&address_space_memory
, buffer
, len
, is_write
, access_len
);
2680 /* warning: addr must be aligned */
2681 static inline uint32_t ldl_phys_internal(AddressSpace
*as
, hwaddr addr
,
2682 enum device_endian endian
)
2690 mr
= address_space_translate(as
, addr
, &addr1
, &l
, false);
2691 if (l
< 4 || !memory_access_is_direct(mr
, false)) {
2693 io_mem_read(mr
, addr1
, &val
, 4);
2694 #if defined(TARGET_WORDS_BIGENDIAN)
2695 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2699 if (endian
== DEVICE_BIG_ENDIAN
) {
2705 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2709 case DEVICE_LITTLE_ENDIAN
:
2710 val
= ldl_le_p(ptr
);
2712 case DEVICE_BIG_ENDIAN
:
2713 val
= ldl_be_p(ptr
);
2723 uint32_t ldl_phys(AddressSpace
*as
, hwaddr addr
)
2725 return ldl_phys_internal(as
, addr
, DEVICE_NATIVE_ENDIAN
);
2728 uint32_t ldl_le_phys(AddressSpace
*as
, hwaddr addr
)
2730 return ldl_phys_internal(as
, addr
, DEVICE_LITTLE_ENDIAN
);
2733 uint32_t ldl_be_phys(AddressSpace
*as
, hwaddr addr
)
2735 return ldl_phys_internal(as
, addr
, DEVICE_BIG_ENDIAN
);
2738 /* warning: addr must be aligned */
2739 static inline uint64_t ldq_phys_internal(AddressSpace
*as
, hwaddr addr
,
2740 enum device_endian endian
)
2748 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2750 if (l
< 8 || !memory_access_is_direct(mr
, false)) {
2752 io_mem_read(mr
, addr1
, &val
, 8);
2753 #if defined(TARGET_WORDS_BIGENDIAN)
2754 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2758 if (endian
== DEVICE_BIG_ENDIAN
) {
2764 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2768 case DEVICE_LITTLE_ENDIAN
:
2769 val
= ldq_le_p(ptr
);
2771 case DEVICE_BIG_ENDIAN
:
2772 val
= ldq_be_p(ptr
);
2782 uint64_t ldq_phys(AddressSpace
*as
, hwaddr addr
)
2784 return ldq_phys_internal(as
, addr
, DEVICE_NATIVE_ENDIAN
);
2787 uint64_t ldq_le_phys(AddressSpace
*as
, hwaddr addr
)
2789 return ldq_phys_internal(as
, addr
, DEVICE_LITTLE_ENDIAN
);
2792 uint64_t ldq_be_phys(AddressSpace
*as
, hwaddr addr
)
2794 return ldq_phys_internal(as
, addr
, DEVICE_BIG_ENDIAN
);
2798 uint32_t ldub_phys(AddressSpace
*as
, hwaddr addr
)
2801 address_space_rw(as
, addr
, &val
, 1, 0);
2805 /* warning: addr must be aligned */
2806 static inline uint32_t lduw_phys_internal(AddressSpace
*as
, hwaddr addr
,
2807 enum device_endian endian
)
2815 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2817 if (l
< 2 || !memory_access_is_direct(mr
, false)) {
2819 io_mem_read(mr
, addr1
, &val
, 2);
2820 #if defined(TARGET_WORDS_BIGENDIAN)
2821 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2825 if (endian
== DEVICE_BIG_ENDIAN
) {
2831 ptr
= qemu_get_ram_ptr((memory_region_get_ram_addr(mr
)
2835 case DEVICE_LITTLE_ENDIAN
:
2836 val
= lduw_le_p(ptr
);
2838 case DEVICE_BIG_ENDIAN
:
2839 val
= lduw_be_p(ptr
);
2849 uint32_t lduw_phys(AddressSpace
*as
, hwaddr addr
)
2851 return lduw_phys_internal(as
, addr
, DEVICE_NATIVE_ENDIAN
);
2854 uint32_t lduw_le_phys(AddressSpace
*as
, hwaddr addr
)
2856 return lduw_phys_internal(as
, addr
, DEVICE_LITTLE_ENDIAN
);
2859 uint32_t lduw_be_phys(AddressSpace
*as
, hwaddr addr
)
2861 return lduw_phys_internal(as
, addr
, DEVICE_BIG_ENDIAN
);
2864 /* warning: addr must be aligned. The ram page is not masked as dirty
2865 and the code inside is not invalidated. It is useful if the dirty
2866 bits are used to track modified PTEs */
2867 void stl_phys_notdirty(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
2874 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2876 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
2877 io_mem_write(mr
, addr1
, val
, 4);
2879 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
2880 ptr
= qemu_get_ram_ptr(addr1
);
2883 if (unlikely(in_migration
)) {
2884 if (cpu_physical_memory_is_clean(addr1
)) {
2885 /* invalidate code */
2886 tb_invalidate_phys_page_range(addr1
, addr1
+ 4, 0);
2888 cpu_physical_memory_set_dirty_range_nocode(addr1
, 4);
2894 /* warning: addr must be aligned */
2895 static inline void stl_phys_internal(AddressSpace
*as
,
2896 hwaddr addr
, uint32_t val
,
2897 enum device_endian endian
)
2904 mr
= address_space_translate(as
, addr
, &addr1
, &l
,
2906 if (l
< 4 || !memory_access_is_direct(mr
, true)) {
2907 #if defined(TARGET_WORDS_BIGENDIAN)
2908 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2912 if (endian
== DEVICE_BIG_ENDIAN
) {
2916 io_mem_write(mr
, addr1
, val
, 4);
2919 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
2920 ptr
= qemu_get_ram_ptr(addr1
);
2922 case DEVICE_LITTLE_ENDIAN
:
2925 case DEVICE_BIG_ENDIAN
:
2932 invalidate_and_set_dirty(addr1
, 4);
2936 void stl_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
2938 stl_phys_internal(as
, addr
, val
, DEVICE_NATIVE_ENDIAN
);
2941 void stl_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
2943 stl_phys_internal(as
, addr
, val
, DEVICE_LITTLE_ENDIAN
);
2946 void stl_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
2948 stl_phys_internal(as
, addr
, val
, DEVICE_BIG_ENDIAN
);
2952 void stb_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
2955 address_space_rw(as
, addr
, &v
, 1, 1);
2958 /* warning: addr must be aligned */
2959 static inline void stw_phys_internal(AddressSpace
*as
,
2960 hwaddr addr
, uint32_t val
,
2961 enum device_endian endian
)
2968 mr
= address_space_translate(as
, addr
, &addr1
, &l
, true);
2969 if (l
< 2 || !memory_access_is_direct(mr
, true)) {
2970 #if defined(TARGET_WORDS_BIGENDIAN)
2971 if (endian
== DEVICE_LITTLE_ENDIAN
) {
2975 if (endian
== DEVICE_BIG_ENDIAN
) {
2979 io_mem_write(mr
, addr1
, val
, 2);
2982 addr1
+= memory_region_get_ram_addr(mr
) & TARGET_PAGE_MASK
;
2983 ptr
= qemu_get_ram_ptr(addr1
);
2985 case DEVICE_LITTLE_ENDIAN
:
2988 case DEVICE_BIG_ENDIAN
:
2995 invalidate_and_set_dirty(addr1
, 2);
2999 void stw_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3001 stw_phys_internal(as
, addr
, val
, DEVICE_NATIVE_ENDIAN
);
3004 void stw_le_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3006 stw_phys_internal(as
, addr
, val
, DEVICE_LITTLE_ENDIAN
);
3009 void stw_be_phys(AddressSpace
*as
, hwaddr addr
, uint32_t val
)
3011 stw_phys_internal(as
, addr
, val
, DEVICE_BIG_ENDIAN
);
3015 void stq_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3018 address_space_rw(as
, addr
, (void *) &val
, 8, 1);
3021 void stq_le_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3023 val
= cpu_to_le64(val
);
3024 address_space_rw(as
, addr
, (void *) &val
, 8, 1);
3027 void stq_be_phys(AddressSpace
*as
, hwaddr addr
, uint64_t val
)
3029 val
= cpu_to_be64(val
);
3030 address_space_rw(as
, addr
, (void *) &val
, 8, 1);
3033 /* virtual memory access for debug (includes writing to ROM) */
3034 int cpu_memory_rw_debug(CPUState
*cpu
, target_ulong addr
,
3035 uint8_t *buf
, int len
, int is_write
)
3042 page
= addr
& TARGET_PAGE_MASK
;
3043 phys_addr
= cpu_get_phys_page_debug(cpu
, page
);
3044 /* if no physical page mapped, return an error */
3045 if (phys_addr
== -1)
3047 l
= (page
+ TARGET_PAGE_SIZE
) - addr
;
3050 phys_addr
+= (addr
& ~TARGET_PAGE_MASK
);
3052 cpu_physical_memory_write_rom(cpu
->as
, phys_addr
, buf
, l
);
3054 address_space_rw(cpu
->as
, phys_addr
, buf
, l
, 0);
3065 * A helper function for the _utterly broken_ virtio device model to find out if
3066 * it's running on a big endian machine. Don't do this at home kids!
3068 bool target_words_bigendian(void);
3069 bool target_words_bigendian(void)
3071 #if defined(TARGET_WORDS_BIGENDIAN)
3078 #ifndef CONFIG_USER_ONLY
3079 bool cpu_physical_memory_is_io(hwaddr phys_addr
)
3084 mr
= address_space_translate(&address_space_memory
,
3085 phys_addr
, &phys_addr
, &l
, false);
3087 return !(memory_region_is_ram(mr
) ||
3088 memory_region_is_romd(mr
));
3091 void qemu_ram_foreach_block(RAMBlockIterFunc func
, void *opaque
)
3096 QLIST_FOREACH_RCU(block
, &ram_list
.blocks
, next
) {
3097 func(block
->host
, block
->offset
, block
->used_length
, opaque
);