Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging
[qemu.git] / linux-user / mmap.c
1 /*
2 * mmap support for qemu
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29
30 void mmap_lock(void)
31 {
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
34 }
35 }
36
37 void mmap_unlock(void)
38 {
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
41 }
42 }
43
44 bool have_mmap_lock(void)
45 {
46 return mmap_lock_count > 0 ? true : false;
47 }
48
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
51 {
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
55 }
56
57 void mmap_fork_end(int child)
58 {
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
63 }
64
65 /*
66 * Validate target prot bitmask.
67 * Return the prot bitmask for the host in *HOST_PROT.
68 * Return 0 if the target prot bitmask is invalid, otherwise
69 * the internal qemu page_flags (which will include PAGE_VALID).
70 */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75
76 /*
77 * For the host, we need not pass anything except read/write/exec.
78 * While PROT_SEM is allowed by all hosts, it is also ignored, so
79 * don't bother transforming guest bit to host bit. Any other
80 * target-specific prot bits will not be understood by the host
81 * and will need to be encoded into page_flags for qemu emulation.
82 *
83 * Pages that are executable by the guest will never be executed
84 * by the host, but the host will need to be able to read them.
85 */
86 *host_prot = (prot & (PROT_READ | PROT_WRITE))
87 | (prot & PROT_EXEC ? PROT_READ : 0);
88
89 #ifdef TARGET_AARCH64
90 {
91 ARMCPU *cpu = ARM_CPU(thread_cpu);
92
93 /*
94 * The PROT_BTI bit is only accepted if the cpu supports the feature.
95 * Since this is the unusual case, don't bother checking unless
96 * the bit has been requested. If set and valid, record the bit
97 * within QEMU's page_flags.
98 */
99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100 valid |= TARGET_PROT_BTI;
101 page_flags |= PAGE_BTI;
102 }
103 /* Similarly for the PROT_MTE bit. */
104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105 valid |= TARGET_PROT_MTE;
106 page_flags |= PAGE_MTE;
107 }
108 }
109 #elif defined(TARGET_HPPA)
110 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
112
113 return prot & ~valid ? 0 : page_flags;
114 }
115
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118 {
119 abi_ulong end, host_start, host_end, addr;
120 int prot1, ret, page_flags, host_prot;
121
122 trace_target_mprotect(start, len, target_prot);
123
124 if ((start & ~TARGET_PAGE_MASK) != 0) {
125 return -TARGET_EINVAL;
126 }
127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128 if (!page_flags) {
129 return -TARGET_EINVAL;
130 }
131 len = TARGET_PAGE_ALIGN(len);
132 end = start + len;
133 if (!guest_range_valid_untagged(start, len)) {
134 return -TARGET_ENOMEM;
135 }
136 if (len == 0) {
137 return 0;
138 }
139
140 mmap_lock();
141 host_start = start & qemu_host_page_mask;
142 host_end = HOST_PAGE_ALIGN(end);
143 if (start > host_start) {
144 /* handle host page containing start */
145 prot1 = host_prot;
146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147 prot1 |= page_get_flags(addr);
148 }
149 if (host_end == host_start + qemu_host_page_size) {
150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151 prot1 |= page_get_flags(addr);
152 }
153 end = host_end;
154 }
155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156 prot1 & PAGE_BITS);
157 if (ret != 0) {
158 goto error;
159 }
160 host_start += qemu_host_page_size;
161 }
162 if (end < host_end) {
163 prot1 = host_prot;
164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165 prot1 |= page_get_flags(addr);
166 }
167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168 qemu_host_page_size, prot1 & PAGE_BITS);
169 if (ret != 0) {
170 goto error;
171 }
172 host_end -= qemu_host_page_size;
173 }
174
175 /* handle the pages in the middle */
176 if (host_start < host_end) {
177 ret = mprotect(g2h_untagged(host_start),
178 host_end - host_start, host_prot);
179 if (ret != 0) {
180 goto error;
181 }
182 }
183
184 page_set_flags(start, start + len, page_flags);
185 tb_invalidate_phys_range(start, start + len);
186 ret = 0;
187
188 error:
189 mmap_unlock();
190 return ret;
191 }
192
193 /* map an incomplete host page */
194 static int mmap_frag(abi_ulong real_start,
195 abi_ulong start, abi_ulong end,
196 int prot, int flags, int fd, abi_ulong offset)
197 {
198 abi_ulong real_end, addr;
199 void *host_start;
200 int prot1, prot_new;
201
202 real_end = real_start + qemu_host_page_size;
203 host_start = g2h_untagged(real_start);
204
205 /* get the protection of the target pages outside the mapping */
206 prot1 = 0;
207 for(addr = real_start; addr < real_end; addr++) {
208 if (addr < start || addr >= end)
209 prot1 |= page_get_flags(addr);
210 }
211
212 if (prot1 == 0) {
213 /* no page was there, so we allocate one */
214 void *p = mmap(host_start, qemu_host_page_size, prot,
215 flags | MAP_ANONYMOUS, -1, 0);
216 if (p == MAP_FAILED)
217 return -1;
218 prot1 = prot;
219 }
220 prot1 &= PAGE_BITS;
221
222 prot_new = prot | prot1;
223 if (!(flags & MAP_ANONYMOUS)) {
224 /* msync() won't work here, so we return an error if write is
225 possible while it is a shared mapping */
226 if ((flags & MAP_TYPE) == MAP_SHARED &&
227 (prot & PROT_WRITE))
228 return -1;
229
230 /* adjust protection to be able to read */
231 if (!(prot1 & PROT_WRITE))
232 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
233
234 /* read the corresponding file data */
235 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
236 return -1;
237
238 /* put final protection */
239 if (prot_new != (prot1 | PROT_WRITE))
240 mprotect(host_start, qemu_host_page_size, prot_new);
241 } else {
242 if (prot_new != prot1) {
243 mprotect(host_start, qemu_host_page_size, prot_new);
244 }
245 if (prot_new & PROT_WRITE) {
246 memset(g2h_untagged(start), 0, end - start);
247 }
248 }
249 return 0;
250 }
251
252 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
253 #ifdef TARGET_AARCH64
254 # define TASK_UNMAPPED_BASE 0x5500000000
255 #else
256 # define TASK_UNMAPPED_BASE (1ul << 38)
257 #endif
258 #else
259 #ifdef TARGET_HPPA
260 # define TASK_UNMAPPED_BASE 0xfa000000
261 #else
262 # define TASK_UNMAPPED_BASE 0x40000000
263 #endif
264 #endif
265 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
266
267 unsigned long last_brk;
268
269 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
270 of guest address space. */
271 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
272 abi_ulong align)
273 {
274 abi_ulong addr, end_addr, incr = qemu_host_page_size;
275 int prot;
276 bool looped = false;
277
278 if (size > reserved_va) {
279 return (abi_ulong)-1;
280 }
281
282 /* Note that start and size have already been aligned by mmap_find_vma. */
283
284 end_addr = start + size;
285 if (start > reserved_va - size) {
286 /* Start at the top of the address space. */
287 end_addr = ((reserved_va - size) & -align) + size;
288 looped = true;
289 }
290
291 /* Search downward from END_ADDR, checking to see if a page is in use. */
292 addr = end_addr;
293 while (1) {
294 addr -= incr;
295 if (addr > end_addr) {
296 if (looped) {
297 /* Failure. The entire address space has been searched. */
298 return (abi_ulong)-1;
299 }
300 /* Re-start at the top of the address space. */
301 addr = end_addr = ((reserved_va - size) & -align) + size;
302 looped = true;
303 } else {
304 prot = page_get_flags(addr);
305 if (prot) {
306 /* Page in use. Restart below this page. */
307 addr = end_addr = ((addr - size) & -align) + size;
308 } else if (addr && addr + size == end_addr) {
309 /* Success! All pages between ADDR and END_ADDR are free. */
310 if (start == mmap_next_start) {
311 mmap_next_start = addr;
312 }
313 return addr;
314 }
315 }
316 }
317 }
318
319 /*
320 * Find and reserve a free memory area of size 'size'. The search
321 * starts at 'start'.
322 * It must be called with mmap_lock() held.
323 * Return -1 if error.
324 */
325 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
326 {
327 void *ptr, *prev;
328 abi_ulong addr;
329 int wrapped, repeat;
330
331 align = MAX(align, qemu_host_page_size);
332
333 /* If 'start' == 0, then a default start address is used. */
334 if (start == 0) {
335 start = mmap_next_start;
336 } else {
337 start &= qemu_host_page_mask;
338 }
339 start = ROUND_UP(start, align);
340
341 size = HOST_PAGE_ALIGN(size);
342
343 if (reserved_va) {
344 return mmap_find_vma_reserved(start, size, align);
345 }
346
347 addr = start;
348 wrapped = repeat = 0;
349 prev = 0;
350
351 for (;; prev = ptr) {
352 /*
353 * Reserve needed memory area to avoid a race.
354 * It should be discarded using:
355 * - mmap() with MAP_FIXED flag
356 * - mremap() with MREMAP_FIXED flag
357 * - shmat() with SHM_REMAP flag
358 */
359 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
360 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
361
362 /* ENOMEM, if host address space has no memory */
363 if (ptr == MAP_FAILED) {
364 return (abi_ulong)-1;
365 }
366
367 /* Count the number of sequential returns of the same address.
368 This is used to modify the search algorithm below. */
369 repeat = (ptr == prev ? repeat + 1 : 0);
370
371 if (h2g_valid(ptr + size - 1)) {
372 addr = h2g(ptr);
373
374 if ((addr & (align - 1)) == 0) {
375 /* Success. */
376 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
377 mmap_next_start = addr + size;
378 }
379 return addr;
380 }
381
382 /* The address is not properly aligned for the target. */
383 switch (repeat) {
384 case 0:
385 /* Assume the result that the kernel gave us is the
386 first with enough free space, so start again at the
387 next higher target page. */
388 addr = ROUND_UP(addr, align);
389 break;
390 case 1:
391 /* Sometimes the kernel decides to perform the allocation
392 at the top end of memory instead. */
393 addr &= -align;
394 break;
395 case 2:
396 /* Start over at low memory. */
397 addr = 0;
398 break;
399 default:
400 /* Fail. This unaligned block must the last. */
401 addr = -1;
402 break;
403 }
404 } else {
405 /* Since the result the kernel gave didn't fit, start
406 again at low memory. If any repetition, fail. */
407 addr = (repeat ? -1 : 0);
408 }
409
410 /* Unmap and try again. */
411 munmap(ptr, size);
412
413 /* ENOMEM if we checked the whole of the target address space. */
414 if (addr == (abi_ulong)-1) {
415 return (abi_ulong)-1;
416 } else if (addr == 0) {
417 if (wrapped) {
418 return (abi_ulong)-1;
419 }
420 wrapped = 1;
421 /* Don't actually use 0 when wrapping, instead indicate
422 that we'd truly like an allocation in low memory. */
423 addr = (mmap_min_addr > TARGET_PAGE_SIZE
424 ? TARGET_PAGE_ALIGN(mmap_min_addr)
425 : TARGET_PAGE_SIZE);
426 } else if (wrapped && addr >= start) {
427 return (abi_ulong)-1;
428 }
429 }
430 }
431
432 /* NOTE: all the constants are the HOST ones */
433 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
434 int flags, int fd, abi_ulong offset)
435 {
436 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
437 passthrough_start = -1, passthrough_end = -1;
438 int page_flags, host_prot;
439
440 mmap_lock();
441 trace_target_mmap(start, len, target_prot, flags, fd, offset);
442
443 if (!len) {
444 errno = EINVAL;
445 goto fail;
446 }
447
448 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
449 if (!page_flags) {
450 errno = EINVAL;
451 goto fail;
452 }
453
454 /* Also check for overflows... */
455 len = TARGET_PAGE_ALIGN(len);
456 if (!len) {
457 errno = ENOMEM;
458 goto fail;
459 }
460
461 if (offset & ~TARGET_PAGE_MASK) {
462 errno = EINVAL;
463 goto fail;
464 }
465
466 /*
467 * If we're mapping shared memory, ensure we generate code for parallel
468 * execution and flush old translations. This will work up to the level
469 * supported by the host -- anything that requires EXCP_ATOMIC will not
470 * be atomic with respect to an external process.
471 */
472 if (flags & MAP_SHARED) {
473 CPUState *cpu = thread_cpu;
474 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
475 cpu->tcg_cflags |= CF_PARALLEL;
476 tb_flush(cpu);
477 }
478 }
479
480 real_start = start & qemu_host_page_mask;
481 host_offset = offset & qemu_host_page_mask;
482
483 /* If the user is asking for the kernel to find a location, do that
484 before we truncate the length for mapping files below. */
485 if (!(flags & MAP_FIXED)) {
486 host_len = len + offset - host_offset;
487 host_len = HOST_PAGE_ALIGN(host_len);
488 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
489 if (start == (abi_ulong)-1) {
490 errno = ENOMEM;
491 goto fail;
492 }
493 }
494
495 /* When mapping files into a memory area larger than the file, accesses
496 to pages beyond the file size will cause a SIGBUS.
497
498 For example, if mmaping a file of 100 bytes on a host with 4K pages
499 emulating a target with 8K pages, the target expects to be able to
500 access the first 8K. But the host will trap us on any access beyond
501 4K.
502
503 When emulating a target with a larger page-size than the hosts, we
504 may need to truncate file maps at EOF and add extra anonymous pages
505 up to the targets page boundary. */
506
507 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
508 !(flags & MAP_ANONYMOUS)) {
509 struct stat sb;
510
511 if (fstat (fd, &sb) == -1)
512 goto fail;
513
514 /* Are we trying to create a map beyond EOF?. */
515 if (offset + len > sb.st_size) {
516 /* If so, truncate the file map at eof aligned with
517 the hosts real pagesize. Additional anonymous maps
518 will be created beyond EOF. */
519 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
520 }
521 }
522
523 if (!(flags & MAP_FIXED)) {
524 unsigned long host_start;
525 void *p;
526
527 host_len = len + offset - host_offset;
528 host_len = HOST_PAGE_ALIGN(host_len);
529
530 /* Note: we prefer to control the mapping address. It is
531 especially important if qemu_host_page_size >
532 qemu_real_host_page_size */
533 p = mmap(g2h_untagged(start), host_len, host_prot,
534 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
535 if (p == MAP_FAILED) {
536 goto fail;
537 }
538 /* update start so that it points to the file position at 'offset' */
539 host_start = (unsigned long)p;
540 if (!(flags & MAP_ANONYMOUS)) {
541 p = mmap(g2h_untagged(start), len, host_prot,
542 flags | MAP_FIXED, fd, host_offset);
543 if (p == MAP_FAILED) {
544 munmap(g2h_untagged(start), host_len);
545 goto fail;
546 }
547 host_start += offset - host_offset;
548 }
549 start = h2g(host_start);
550 passthrough_start = start;
551 passthrough_end = start + len;
552 } else {
553 if (start & ~TARGET_PAGE_MASK) {
554 errno = EINVAL;
555 goto fail;
556 }
557 end = start + len;
558 real_end = HOST_PAGE_ALIGN(end);
559
560 /*
561 * Test if requested memory area fits target address space
562 * It can fail only on 64-bit host with 32-bit target.
563 * On any other target/host host mmap() handles this error correctly.
564 */
565 if (end < start || !guest_range_valid_untagged(start, len)) {
566 errno = ENOMEM;
567 goto fail;
568 }
569
570 /* worst case: we cannot map the file because the offset is not
571 aligned, so we read it */
572 if (!(flags & MAP_ANONYMOUS) &&
573 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
574 /* msync() won't work here, so we return an error if write is
575 possible while it is a shared mapping */
576 if ((flags & MAP_TYPE) == MAP_SHARED &&
577 (host_prot & PROT_WRITE)) {
578 errno = EINVAL;
579 goto fail;
580 }
581 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
582 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
583 -1, 0);
584 if (retaddr == -1)
585 goto fail;
586 if (pread(fd, g2h_untagged(start), len, offset) == -1)
587 goto fail;
588 if (!(host_prot & PROT_WRITE)) {
589 ret = target_mprotect(start, len, target_prot);
590 assert(ret == 0);
591 }
592 goto the_end;
593 }
594
595 /* handle the start of the mapping */
596 if (start > real_start) {
597 if (real_end == real_start + qemu_host_page_size) {
598 /* one single host page */
599 ret = mmap_frag(real_start, start, end,
600 host_prot, flags, fd, offset);
601 if (ret == -1)
602 goto fail;
603 goto the_end1;
604 }
605 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
606 host_prot, flags, fd, offset);
607 if (ret == -1)
608 goto fail;
609 real_start += qemu_host_page_size;
610 }
611 /* handle the end of the mapping */
612 if (end < real_end) {
613 ret = mmap_frag(real_end - qemu_host_page_size,
614 real_end - qemu_host_page_size, end,
615 host_prot, flags, fd,
616 offset + real_end - qemu_host_page_size - start);
617 if (ret == -1)
618 goto fail;
619 real_end -= qemu_host_page_size;
620 }
621
622 /* map the middle (easier) */
623 if (real_start < real_end) {
624 void *p;
625 unsigned long offset1;
626 if (flags & MAP_ANONYMOUS)
627 offset1 = 0;
628 else
629 offset1 = offset + real_start - start;
630 p = mmap(g2h_untagged(real_start), real_end - real_start,
631 host_prot, flags, fd, offset1);
632 if (p == MAP_FAILED)
633 goto fail;
634 passthrough_start = real_start;
635 passthrough_end = real_end;
636 }
637 }
638 the_end1:
639 if (flags & MAP_ANONYMOUS) {
640 page_flags |= PAGE_ANON;
641 }
642 page_flags |= PAGE_RESET;
643 if (passthrough_start == passthrough_end) {
644 page_set_flags(start, start + len, page_flags);
645 } else {
646 if (start < passthrough_start) {
647 page_set_flags(start, passthrough_start, page_flags);
648 }
649 page_set_flags(passthrough_start, passthrough_end,
650 page_flags | PAGE_PASSTHROUGH);
651 if (passthrough_end < start + len) {
652 page_set_flags(passthrough_end, start + len, page_flags);
653 }
654 }
655 the_end:
656 trace_target_mmap_complete(start);
657 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
658 FILE *f = qemu_log_trylock();
659 if (f) {
660 fprintf(f, "page layout changed following mmap\n");
661 page_dump(f);
662 qemu_log_unlock(f);
663 }
664 }
665 tb_invalidate_phys_range(start, start + len);
666 mmap_unlock();
667 return start;
668 fail:
669 mmap_unlock();
670 return -1;
671 }
672
673 static void mmap_reserve(abi_ulong start, abi_ulong size)
674 {
675 abi_ulong real_start;
676 abi_ulong real_end;
677 abi_ulong addr;
678 abi_ulong end;
679 int prot;
680
681 real_start = start & qemu_host_page_mask;
682 real_end = HOST_PAGE_ALIGN(start + size);
683 end = start + size;
684 if (start > real_start) {
685 /* handle host page containing start */
686 prot = 0;
687 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
688 prot |= page_get_flags(addr);
689 }
690 if (real_end == real_start + qemu_host_page_size) {
691 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
692 prot |= page_get_flags(addr);
693 }
694 end = real_end;
695 }
696 if (prot != 0)
697 real_start += qemu_host_page_size;
698 }
699 if (end < real_end) {
700 prot = 0;
701 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
702 prot |= page_get_flags(addr);
703 }
704 if (prot != 0)
705 real_end -= qemu_host_page_size;
706 }
707 if (real_start != real_end) {
708 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
709 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
710 -1, 0);
711 }
712 }
713
714 int target_munmap(abi_ulong start, abi_ulong len)
715 {
716 abi_ulong end, real_start, real_end, addr;
717 int prot, ret;
718
719 trace_target_munmap(start, len);
720
721 if (start & ~TARGET_PAGE_MASK)
722 return -TARGET_EINVAL;
723 len = TARGET_PAGE_ALIGN(len);
724 if (len == 0 || !guest_range_valid_untagged(start, len)) {
725 return -TARGET_EINVAL;
726 }
727
728 mmap_lock();
729 end = start + len;
730 real_start = start & qemu_host_page_mask;
731 real_end = HOST_PAGE_ALIGN(end);
732
733 if (start > real_start) {
734 /* handle host page containing start */
735 prot = 0;
736 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
737 prot |= page_get_flags(addr);
738 }
739 if (real_end == real_start + qemu_host_page_size) {
740 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
741 prot |= page_get_flags(addr);
742 }
743 end = real_end;
744 }
745 if (prot != 0)
746 real_start += qemu_host_page_size;
747 }
748 if (end < real_end) {
749 prot = 0;
750 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
751 prot |= page_get_flags(addr);
752 }
753 if (prot != 0)
754 real_end -= qemu_host_page_size;
755 }
756
757 ret = 0;
758 /* unmap what we can */
759 if (real_start < real_end) {
760 if (reserved_va) {
761 mmap_reserve(real_start, real_end - real_start);
762 } else {
763 ret = munmap(g2h_untagged(real_start), real_end - real_start);
764 }
765 }
766
767 if (ret == 0) {
768 page_set_flags(start, start + len, 0);
769 tb_invalidate_phys_range(start, start + len);
770 }
771 mmap_unlock();
772 return ret;
773 }
774
775 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
776 abi_ulong new_size, unsigned long flags,
777 abi_ulong new_addr)
778 {
779 int prot;
780 void *host_addr;
781
782 if (!guest_range_valid_untagged(old_addr, old_size) ||
783 ((flags & MREMAP_FIXED) &&
784 !guest_range_valid_untagged(new_addr, new_size)) ||
785 ((flags & MREMAP_MAYMOVE) == 0 &&
786 !guest_range_valid_untagged(old_addr, new_size))) {
787 errno = ENOMEM;
788 return -1;
789 }
790
791 mmap_lock();
792
793 if (flags & MREMAP_FIXED) {
794 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
795 flags, g2h_untagged(new_addr));
796
797 if (reserved_va && host_addr != MAP_FAILED) {
798 /* If new and old addresses overlap then the above mremap will
799 already have failed with EINVAL. */
800 mmap_reserve(old_addr, old_size);
801 }
802 } else if (flags & MREMAP_MAYMOVE) {
803 abi_ulong mmap_start;
804
805 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
806
807 if (mmap_start == -1) {
808 errno = ENOMEM;
809 host_addr = MAP_FAILED;
810 } else {
811 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
812 flags | MREMAP_FIXED,
813 g2h_untagged(mmap_start));
814 if (reserved_va) {
815 mmap_reserve(old_addr, old_size);
816 }
817 }
818 } else {
819 int prot = 0;
820 if (reserved_va && old_size < new_size) {
821 abi_ulong addr;
822 for (addr = old_addr + old_size;
823 addr < old_addr + new_size;
824 addr++) {
825 prot |= page_get_flags(addr);
826 }
827 }
828 if (prot == 0) {
829 host_addr = mremap(g2h_untagged(old_addr),
830 old_size, new_size, flags);
831
832 if (host_addr != MAP_FAILED) {
833 /* Check if address fits target address space */
834 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
835 /* Revert mremap() changes */
836 host_addr = mremap(g2h_untagged(old_addr),
837 new_size, old_size, flags);
838 errno = ENOMEM;
839 host_addr = MAP_FAILED;
840 } else if (reserved_va && old_size > new_size) {
841 mmap_reserve(old_addr + old_size, old_size - new_size);
842 }
843 }
844 } else {
845 errno = ENOMEM;
846 host_addr = MAP_FAILED;
847 }
848 }
849
850 if (host_addr == MAP_FAILED) {
851 new_addr = -1;
852 } else {
853 new_addr = h2g(host_addr);
854 prot = page_get_flags(old_addr);
855 page_set_flags(old_addr, old_addr + old_size, 0);
856 page_set_flags(new_addr, new_addr + new_size,
857 prot | PAGE_VALID | PAGE_RESET);
858 }
859 tb_invalidate_phys_range(new_addr, new_addr + new_size);
860 mmap_unlock();
861 return new_addr;
862 }
863
864 static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
865 {
866 ulong addr;
867
868 if ((start | end) & ~qemu_host_page_mask) {
869 return false;
870 }
871
872 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
873 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
874 return false;
875 }
876 }
877
878 return true;
879 }
880
881 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
882 {
883 abi_ulong len, end;
884 int ret = 0;
885
886 if (start & ~TARGET_PAGE_MASK) {
887 return -TARGET_EINVAL;
888 }
889 len = TARGET_PAGE_ALIGN(len_in);
890
891 if (len_in && !len) {
892 return -TARGET_EINVAL;
893 }
894
895 end = start + len;
896 if (end < start) {
897 return -TARGET_EINVAL;
898 }
899
900 if (end == start) {
901 return 0;
902 }
903
904 if (!guest_range_valid_untagged(start, len)) {
905 return -TARGET_EINVAL;
906 }
907
908 /*
909 * A straight passthrough may not be safe because qemu sometimes turns
910 * private file-backed mappings into anonymous mappings.
911 *
912 * This is a hint, so ignoring and returning success is ok.
913 *
914 * This breaks MADV_DONTNEED, completely implementing which is quite
915 * complicated. However, there is one low-hanging fruit: mappings that are
916 * known to have the same semantics in the host and the guest. In this case
917 * passthrough is safe, so do it.
918 */
919 mmap_lock();
920 if (advice == TARGET_MADV_DONTNEED &&
921 can_passthrough_madv_dontneed(start, end)) {
922 ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
923 if (ret == 0) {
924 page_reset_target_data(start, start + len);
925 }
926 }
927 mmap_unlock();
928
929 return ret;
930 }