i386: Fix pkg_id offset for EPYC cpu models
[qemu.git] / linux-user / mmap.c
1 /*
2 * mmap support for qemu
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23
24 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
25 static __thread int mmap_lock_count;
26
27 void mmap_lock(void)
28 {
29 if (mmap_lock_count++ == 0) {
30 pthread_mutex_lock(&mmap_mutex);
31 }
32 }
33
34 void mmap_unlock(void)
35 {
36 if (--mmap_lock_count == 0) {
37 pthread_mutex_unlock(&mmap_mutex);
38 }
39 }
40
41 bool have_mmap_lock(void)
42 {
43 return mmap_lock_count > 0 ? true : false;
44 }
45
46 /* Grab lock to make sure things are in a consistent state after fork(). */
47 void mmap_fork_start(void)
48 {
49 if (mmap_lock_count)
50 abort();
51 pthread_mutex_lock(&mmap_mutex);
52 }
53
54 void mmap_fork_end(int child)
55 {
56 if (child)
57 pthread_mutex_init(&mmap_mutex, NULL);
58 else
59 pthread_mutex_unlock(&mmap_mutex);
60 }
61
62 /* NOTE: all the constants are the HOST ones, but addresses are target. */
63 int target_mprotect(abi_ulong start, abi_ulong len, int prot)
64 {
65 abi_ulong end, host_start, host_end, addr;
66 int prot1, ret;
67
68 trace_target_mprotect(start, len, prot);
69
70 if ((start & ~TARGET_PAGE_MASK) != 0)
71 return -TARGET_EINVAL;
72 len = TARGET_PAGE_ALIGN(len);
73 end = start + len;
74 if (!guest_range_valid(start, len)) {
75 return -TARGET_ENOMEM;
76 }
77 prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
78 if (len == 0)
79 return 0;
80
81 mmap_lock();
82 host_start = start & qemu_host_page_mask;
83 host_end = HOST_PAGE_ALIGN(end);
84 if (start > host_start) {
85 /* handle host page containing start */
86 prot1 = prot;
87 for(addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
88 prot1 |= page_get_flags(addr);
89 }
90 if (host_end == host_start + qemu_host_page_size) {
91 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
92 prot1 |= page_get_flags(addr);
93 }
94 end = host_end;
95 }
96 ret = mprotect(g2h(host_start), qemu_host_page_size, prot1 & PAGE_BITS);
97 if (ret != 0)
98 goto error;
99 host_start += qemu_host_page_size;
100 }
101 if (end < host_end) {
102 prot1 = prot;
103 for(addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
104 prot1 |= page_get_flags(addr);
105 }
106 ret = mprotect(g2h(host_end - qemu_host_page_size), qemu_host_page_size,
107 prot1 & PAGE_BITS);
108 if (ret != 0)
109 goto error;
110 host_end -= qemu_host_page_size;
111 }
112
113 /* handle the pages in the middle */
114 if (host_start < host_end) {
115 ret = mprotect(g2h(host_start), host_end - host_start, prot);
116 if (ret != 0)
117 goto error;
118 }
119 page_set_flags(start, start + len, prot | PAGE_VALID);
120 mmap_unlock();
121 return 0;
122 error:
123 mmap_unlock();
124 return ret;
125 }
126
127 /* map an incomplete host page */
128 static int mmap_frag(abi_ulong real_start,
129 abi_ulong start, abi_ulong end,
130 int prot, int flags, int fd, abi_ulong offset)
131 {
132 abi_ulong real_end, addr;
133 void *host_start;
134 int prot1, prot_new;
135
136 real_end = real_start + qemu_host_page_size;
137 host_start = g2h(real_start);
138
139 /* get the protection of the target pages outside the mapping */
140 prot1 = 0;
141 for(addr = real_start; addr < real_end; addr++) {
142 if (addr < start || addr >= end)
143 prot1 |= page_get_flags(addr);
144 }
145
146 if (prot1 == 0) {
147 /* no page was there, so we allocate one */
148 void *p = mmap(host_start, qemu_host_page_size, prot,
149 flags | MAP_ANONYMOUS, -1, 0);
150 if (p == MAP_FAILED)
151 return -1;
152 prot1 = prot;
153 }
154 prot1 &= PAGE_BITS;
155
156 prot_new = prot | prot1;
157 if (!(flags & MAP_ANONYMOUS)) {
158 /* msync() won't work here, so we return an error if write is
159 possible while it is a shared mapping */
160 if ((flags & MAP_TYPE) == MAP_SHARED &&
161 (prot & PROT_WRITE))
162 return -1;
163
164 /* adjust protection to be able to read */
165 if (!(prot1 & PROT_WRITE))
166 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
167
168 /* read the corresponding file data */
169 if (pread(fd, g2h(start), end - start, offset) == -1)
170 return -1;
171
172 /* put final protection */
173 if (prot_new != (prot1 | PROT_WRITE))
174 mprotect(host_start, qemu_host_page_size, prot_new);
175 } else {
176 if (prot_new != prot1) {
177 mprotect(host_start, qemu_host_page_size, prot_new);
178 }
179 if (prot_new & PROT_WRITE) {
180 memset(g2h(start), 0, end - start);
181 }
182 }
183 return 0;
184 }
185
186 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
187 #ifdef TARGET_AARCH64
188 # define TASK_UNMAPPED_BASE 0x5500000000
189 #else
190 # define TASK_UNMAPPED_BASE (1ul << 38)
191 #endif
192 #else
193 # define TASK_UNMAPPED_BASE 0x40000000
194 #endif
195 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
196
197 unsigned long last_brk;
198
199 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
200 of guest address space. */
201 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
202 abi_ulong align)
203 {
204 abi_ulong addr, end_addr, incr = qemu_host_page_size;
205 int prot;
206 bool looped = false;
207
208 if (size > reserved_va) {
209 return (abi_ulong)-1;
210 }
211
212 /* Note that start and size have already been aligned by mmap_find_vma. */
213
214 end_addr = start + size;
215 if (start > reserved_va - size) {
216 /* Start at the top of the address space. */
217 end_addr = ((reserved_va - size) & -align) + size;
218 looped = true;
219 }
220
221 /* Search downward from END_ADDR, checking to see if a page is in use. */
222 addr = end_addr;
223 while (1) {
224 addr -= incr;
225 if (addr > end_addr) {
226 if (looped) {
227 /* Failure. The entire address space has been searched. */
228 return (abi_ulong)-1;
229 }
230 /* Re-start at the top of the address space. */
231 addr = end_addr = ((reserved_va - size) & -align) + size;
232 looped = true;
233 } else {
234 prot = page_get_flags(addr);
235 if (prot) {
236 /* Page in use. Restart below this page. */
237 addr = end_addr = ((addr - size) & -align) + size;
238 } else if (addr && addr + size == end_addr) {
239 /* Success! All pages between ADDR and END_ADDR are free. */
240 if (start == mmap_next_start) {
241 mmap_next_start = addr;
242 }
243 return addr;
244 }
245 }
246 }
247 }
248
249 /*
250 * Find and reserve a free memory area of size 'size'. The search
251 * starts at 'start'.
252 * It must be called with mmap_lock() held.
253 * Return -1 if error.
254 */
255 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
256 {
257 void *ptr, *prev;
258 abi_ulong addr;
259 int wrapped, repeat;
260
261 align = MAX(align, qemu_host_page_size);
262
263 /* If 'start' == 0, then a default start address is used. */
264 if (start == 0) {
265 start = mmap_next_start;
266 } else {
267 start &= qemu_host_page_mask;
268 }
269 start = ROUND_UP(start, align);
270
271 size = HOST_PAGE_ALIGN(size);
272
273 if (reserved_va) {
274 return mmap_find_vma_reserved(start, size, align);
275 }
276
277 addr = start;
278 wrapped = repeat = 0;
279 prev = 0;
280
281 for (;; prev = ptr) {
282 /*
283 * Reserve needed memory area to avoid a race.
284 * It should be discarded using:
285 * - mmap() with MAP_FIXED flag
286 * - mremap() with MREMAP_FIXED flag
287 * - shmat() with SHM_REMAP flag
288 */
289 ptr = mmap(g2h(addr), size, PROT_NONE,
290 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
291
292 /* ENOMEM, if host address space has no memory */
293 if (ptr == MAP_FAILED) {
294 return (abi_ulong)-1;
295 }
296
297 /* Count the number of sequential returns of the same address.
298 This is used to modify the search algorithm below. */
299 repeat = (ptr == prev ? repeat + 1 : 0);
300
301 if (h2g_valid(ptr + size - 1)) {
302 addr = h2g(ptr);
303
304 if ((addr & (align - 1)) == 0) {
305 /* Success. */
306 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
307 mmap_next_start = addr + size;
308 }
309 return addr;
310 }
311
312 /* The address is not properly aligned for the target. */
313 switch (repeat) {
314 case 0:
315 /* Assume the result that the kernel gave us is the
316 first with enough free space, so start again at the
317 next higher target page. */
318 addr = ROUND_UP(addr, align);
319 break;
320 case 1:
321 /* Sometimes the kernel decides to perform the allocation
322 at the top end of memory instead. */
323 addr &= -align;
324 break;
325 case 2:
326 /* Start over at low memory. */
327 addr = 0;
328 break;
329 default:
330 /* Fail. This unaligned block must the last. */
331 addr = -1;
332 break;
333 }
334 } else {
335 /* Since the result the kernel gave didn't fit, start
336 again at low memory. If any repetition, fail. */
337 addr = (repeat ? -1 : 0);
338 }
339
340 /* Unmap and try again. */
341 munmap(ptr, size);
342
343 /* ENOMEM if we checked the whole of the target address space. */
344 if (addr == (abi_ulong)-1) {
345 return (abi_ulong)-1;
346 } else if (addr == 0) {
347 if (wrapped) {
348 return (abi_ulong)-1;
349 }
350 wrapped = 1;
351 /* Don't actually use 0 when wrapping, instead indicate
352 that we'd truly like an allocation in low memory. */
353 addr = (mmap_min_addr > TARGET_PAGE_SIZE
354 ? TARGET_PAGE_ALIGN(mmap_min_addr)
355 : TARGET_PAGE_SIZE);
356 } else if (wrapped && addr >= start) {
357 return (abi_ulong)-1;
358 }
359 }
360 }
361
362 /* NOTE: all the constants are the HOST ones */
363 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
364 int flags, int fd, abi_ulong offset)
365 {
366 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
367
368 mmap_lock();
369 trace_target_mmap(start, len, prot, flags, fd, offset);
370
371 if (!len) {
372 errno = EINVAL;
373 goto fail;
374 }
375
376 /* Also check for overflows... */
377 len = TARGET_PAGE_ALIGN(len);
378 if (!len) {
379 errno = ENOMEM;
380 goto fail;
381 }
382
383 if (offset & ~TARGET_PAGE_MASK) {
384 errno = EINVAL;
385 goto fail;
386 }
387
388 real_start = start & qemu_host_page_mask;
389 host_offset = offset & qemu_host_page_mask;
390
391 /* If the user is asking for the kernel to find a location, do that
392 before we truncate the length for mapping files below. */
393 if (!(flags & MAP_FIXED)) {
394 host_len = len + offset - host_offset;
395 host_len = HOST_PAGE_ALIGN(host_len);
396 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
397 if (start == (abi_ulong)-1) {
398 errno = ENOMEM;
399 goto fail;
400 }
401 }
402
403 /* When mapping files into a memory area larger than the file, accesses
404 to pages beyond the file size will cause a SIGBUS.
405
406 For example, if mmaping a file of 100 bytes on a host with 4K pages
407 emulating a target with 8K pages, the target expects to be able to
408 access the first 8K. But the host will trap us on any access beyond
409 4K.
410
411 When emulating a target with a larger page-size than the hosts, we
412 may need to truncate file maps at EOF and add extra anonymous pages
413 up to the targets page boundary. */
414
415 if ((qemu_real_host_page_size < qemu_host_page_size) &&
416 !(flags & MAP_ANONYMOUS)) {
417 struct stat sb;
418
419 if (fstat (fd, &sb) == -1)
420 goto fail;
421
422 /* Are we trying to create a map beyond EOF?. */
423 if (offset + len > sb.st_size) {
424 /* If so, truncate the file map at eof aligned with
425 the hosts real pagesize. Additional anonymous maps
426 will be created beyond EOF. */
427 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
428 }
429 }
430
431 if (!(flags & MAP_FIXED)) {
432 unsigned long host_start;
433 void *p;
434
435 host_len = len + offset - host_offset;
436 host_len = HOST_PAGE_ALIGN(host_len);
437
438 /* Note: we prefer to control the mapping address. It is
439 especially important if qemu_host_page_size >
440 qemu_real_host_page_size */
441 p = mmap(g2h(start), host_len, prot,
442 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
443 if (p == MAP_FAILED)
444 goto fail;
445 /* update start so that it points to the file position at 'offset' */
446 host_start = (unsigned long)p;
447 if (!(flags & MAP_ANONYMOUS)) {
448 p = mmap(g2h(start), len, prot,
449 flags | MAP_FIXED, fd, host_offset);
450 if (p == MAP_FAILED) {
451 munmap(g2h(start), host_len);
452 goto fail;
453 }
454 host_start += offset - host_offset;
455 }
456 start = h2g(host_start);
457 } else {
458 if (start & ~TARGET_PAGE_MASK) {
459 errno = EINVAL;
460 goto fail;
461 }
462 end = start + len;
463 real_end = HOST_PAGE_ALIGN(end);
464
465 /*
466 * Test if requested memory area fits target address space
467 * It can fail only on 64-bit host with 32-bit target.
468 * On any other target/host host mmap() handles this error correctly.
469 */
470 if (!guest_range_valid(start, len)) {
471 errno = ENOMEM;
472 goto fail;
473 }
474
475 /* worst case: we cannot map the file because the offset is not
476 aligned, so we read it */
477 if (!(flags & MAP_ANONYMOUS) &&
478 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
479 /* msync() won't work here, so we return an error if write is
480 possible while it is a shared mapping */
481 if ((flags & MAP_TYPE) == MAP_SHARED &&
482 (prot & PROT_WRITE)) {
483 errno = EINVAL;
484 goto fail;
485 }
486 retaddr = target_mmap(start, len, prot | PROT_WRITE,
487 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
488 -1, 0);
489 if (retaddr == -1)
490 goto fail;
491 if (pread(fd, g2h(start), len, offset) == -1)
492 goto fail;
493 if (!(prot & PROT_WRITE)) {
494 ret = target_mprotect(start, len, prot);
495 assert(ret == 0);
496 }
497 goto the_end;
498 }
499
500 /* handle the start of the mapping */
501 if (start > real_start) {
502 if (real_end == real_start + qemu_host_page_size) {
503 /* one single host page */
504 ret = mmap_frag(real_start, start, end,
505 prot, flags, fd, offset);
506 if (ret == -1)
507 goto fail;
508 goto the_end1;
509 }
510 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
511 prot, flags, fd, offset);
512 if (ret == -1)
513 goto fail;
514 real_start += qemu_host_page_size;
515 }
516 /* handle the end of the mapping */
517 if (end < real_end) {
518 ret = mmap_frag(real_end - qemu_host_page_size,
519 real_end - qemu_host_page_size, end,
520 prot, flags, fd,
521 offset + real_end - qemu_host_page_size - start);
522 if (ret == -1)
523 goto fail;
524 real_end -= qemu_host_page_size;
525 }
526
527 /* map the middle (easier) */
528 if (real_start < real_end) {
529 void *p;
530 unsigned long offset1;
531 if (flags & MAP_ANONYMOUS)
532 offset1 = 0;
533 else
534 offset1 = offset + real_start - start;
535 p = mmap(g2h(real_start), real_end - real_start,
536 prot, flags, fd, offset1);
537 if (p == MAP_FAILED)
538 goto fail;
539 }
540 }
541 the_end1:
542 page_set_flags(start, start + len, prot | PAGE_VALID);
543 the_end:
544 trace_target_mmap_complete(start);
545 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
546 log_page_dump(__func__);
547 }
548 tb_invalidate_phys_range(start, start + len);
549 mmap_unlock();
550 return start;
551 fail:
552 mmap_unlock();
553 return -1;
554 }
555
556 static void mmap_reserve(abi_ulong start, abi_ulong size)
557 {
558 abi_ulong real_start;
559 abi_ulong real_end;
560 abi_ulong addr;
561 abi_ulong end;
562 int prot;
563
564 real_start = start & qemu_host_page_mask;
565 real_end = HOST_PAGE_ALIGN(start + size);
566 end = start + size;
567 if (start > real_start) {
568 /* handle host page containing start */
569 prot = 0;
570 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
571 prot |= page_get_flags(addr);
572 }
573 if (real_end == real_start + qemu_host_page_size) {
574 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
575 prot |= page_get_flags(addr);
576 }
577 end = real_end;
578 }
579 if (prot != 0)
580 real_start += qemu_host_page_size;
581 }
582 if (end < real_end) {
583 prot = 0;
584 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
585 prot |= page_get_flags(addr);
586 }
587 if (prot != 0)
588 real_end -= qemu_host_page_size;
589 }
590 if (real_start != real_end) {
591 mmap(g2h(real_start), real_end - real_start, PROT_NONE,
592 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
593 -1, 0);
594 }
595 }
596
597 int target_munmap(abi_ulong start, abi_ulong len)
598 {
599 abi_ulong end, real_start, real_end, addr;
600 int prot, ret;
601
602 trace_target_munmap(start, len);
603
604 if (start & ~TARGET_PAGE_MASK)
605 return -TARGET_EINVAL;
606 len = TARGET_PAGE_ALIGN(len);
607 if (len == 0 || !guest_range_valid(start, len)) {
608 return -TARGET_EINVAL;
609 }
610
611 mmap_lock();
612 end = start + len;
613 real_start = start & qemu_host_page_mask;
614 real_end = HOST_PAGE_ALIGN(end);
615
616 if (start > real_start) {
617 /* handle host page containing start */
618 prot = 0;
619 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
620 prot |= page_get_flags(addr);
621 }
622 if (real_end == real_start + qemu_host_page_size) {
623 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
624 prot |= page_get_flags(addr);
625 }
626 end = real_end;
627 }
628 if (prot != 0)
629 real_start += qemu_host_page_size;
630 }
631 if (end < real_end) {
632 prot = 0;
633 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
634 prot |= page_get_flags(addr);
635 }
636 if (prot != 0)
637 real_end -= qemu_host_page_size;
638 }
639
640 ret = 0;
641 /* unmap what we can */
642 if (real_start < real_end) {
643 if (reserved_va) {
644 mmap_reserve(real_start, real_end - real_start);
645 } else {
646 ret = munmap(g2h(real_start), real_end - real_start);
647 }
648 }
649
650 if (ret == 0) {
651 page_set_flags(start, start + len, 0);
652 tb_invalidate_phys_range(start, start + len);
653 }
654 mmap_unlock();
655 return ret;
656 }
657
658 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
659 abi_ulong new_size, unsigned long flags,
660 abi_ulong new_addr)
661 {
662 int prot;
663 void *host_addr;
664
665 if (!guest_range_valid(old_addr, old_size) ||
666 ((flags & MREMAP_FIXED) &&
667 !guest_range_valid(new_addr, new_size))) {
668 errno = ENOMEM;
669 return -1;
670 }
671
672 mmap_lock();
673
674 if (flags & MREMAP_FIXED) {
675 host_addr = mremap(g2h(old_addr), old_size, new_size,
676 flags, g2h(new_addr));
677
678 if (reserved_va && host_addr != MAP_FAILED) {
679 /* If new and old addresses overlap then the above mremap will
680 already have failed with EINVAL. */
681 mmap_reserve(old_addr, old_size);
682 }
683 } else if (flags & MREMAP_MAYMOVE) {
684 abi_ulong mmap_start;
685
686 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
687
688 if (mmap_start == -1) {
689 errno = ENOMEM;
690 host_addr = MAP_FAILED;
691 } else {
692 host_addr = mremap(g2h(old_addr), old_size, new_size,
693 flags | MREMAP_FIXED, g2h(mmap_start));
694 if (reserved_va) {
695 mmap_reserve(old_addr, old_size);
696 }
697 }
698 } else {
699 int prot = 0;
700 if (reserved_va && old_size < new_size) {
701 abi_ulong addr;
702 for (addr = old_addr + old_size;
703 addr < old_addr + new_size;
704 addr++) {
705 prot |= page_get_flags(addr);
706 }
707 }
708 if (prot == 0) {
709 host_addr = mremap(g2h(old_addr), old_size, new_size, flags);
710 if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) {
711 mmap_reserve(old_addr + old_size, new_size - old_size);
712 }
713 } else {
714 errno = ENOMEM;
715 host_addr = MAP_FAILED;
716 }
717 /* Check if address fits target address space */
718 if ((unsigned long)host_addr + new_size > (abi_ulong)-1) {
719 /* Revert mremap() changes */
720 host_addr = mremap(g2h(old_addr), new_size, old_size, flags);
721 errno = ENOMEM;
722 host_addr = MAP_FAILED;
723 }
724 }
725
726 if (host_addr == MAP_FAILED) {
727 new_addr = -1;
728 } else {
729 new_addr = h2g(host_addr);
730 prot = page_get_flags(old_addr);
731 page_set_flags(old_addr, old_addr + old_size, 0);
732 page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID);
733 }
734 tb_invalidate_phys_range(new_addr, new_addr + new_size);
735 mmap_unlock();
736 return new_addr;
737 }