Merge tag 'pull-testing-gdbstub-plugins-gitdm-061022-3' of https://github.com/stsquad...
[qemu.git] / hw / virtio / virtio-mem.c
1 /*
2 * Virtio MEM device
3 *
4 * Copyright (C) 2020 Red Hat, Inc.
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu-common.h"
15 #include "qemu/iov.h"
16 #include "qemu/cutils.h"
17 #include "qemu/error-report.h"
18 #include "qemu/units.h"
19 #include "sysemu/numa.h"
20 #include "sysemu/sysemu.h"
21 #include "sysemu/reset.h"
22 #include "hw/virtio/virtio.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "hw/virtio/virtio-access.h"
25 #include "hw/virtio/virtio-mem.h"
26 #include "qapi/error.h"
27 #include "qapi/visitor.h"
28 #include "exec/ram_addr.h"
29 #include "migration/misc.h"
30 #include "hw/boards.h"
31 #include "hw/qdev-properties.h"
32 #include CONFIG_DEVICES
33 #include "trace.h"
34
35 /*
36 * We only had legacy x86 guests that did not support
37 * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE. Other targets don't have legacy guests.
38 */
39 #if defined(TARGET_X86_64) || defined(TARGET_I386)
40 #define VIRTIO_MEM_HAS_LEGACY_GUESTS
41 #endif
42
43 /*
44 * Let's not allow blocks smaller than 1 MiB, for example, to keep the tracking
45 * bitmap small.
46 */
47 #define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
48
49 static uint32_t virtio_mem_default_thp_size(void)
50 {
51 uint32_t default_thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE;
52
53 #if defined(__x86_64__) || defined(__arm__) || defined(__powerpc64__)
54 default_thp_size = 2 * MiB;
55 #elif defined(__aarch64__)
56 if (qemu_real_host_page_size == 4 * KiB) {
57 default_thp_size = 2 * MiB;
58 } else if (qemu_real_host_page_size == 16 * KiB) {
59 default_thp_size = 32 * MiB;
60 } else if (qemu_real_host_page_size == 64 * KiB) {
61 default_thp_size = 512 * MiB;
62 }
63 #endif
64
65 return default_thp_size;
66 }
67
68 /*
69 * We want to have a reasonable default block size such that
70 * 1. We avoid splitting THPs when unplugging memory, which degrades
71 * performance.
72 * 2. We avoid placing THPs for plugged blocks that also cover unplugged
73 * blocks.
74 *
75 * The actual THP size might differ between Linux kernels, so we try to probe
76 * it. In the future (if we ever run into issues regarding 2.), we might want
77 * to disable THP in case we fail to properly probe the THP size, or if the
78 * block size is configured smaller than the THP size.
79 */
80 static uint32_t thp_size;
81
82 #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
83 static uint32_t virtio_mem_thp_size(void)
84 {
85 gchar *content = NULL;
86 const char *endptr;
87 uint64_t tmp;
88
89 if (thp_size) {
90 return thp_size;
91 }
92
93 /*
94 * Try to probe the actual THP size, fallback to (sane but eventually
95 * incorrect) default sizes.
96 */
97 if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
98 !qemu_strtou64(content, &endptr, 0, &tmp) &&
99 (!endptr || *endptr == '\n')) {
100 /* Sanity-check the value and fallback to something reasonable. */
101 if (!tmp || !is_power_of_2(tmp)) {
102 warn_report("Read unsupported THP size: %" PRIx64, tmp);
103 } else {
104 thp_size = tmp;
105 }
106 }
107
108 if (!thp_size) {
109 thp_size = virtio_mem_default_thp_size();
110 warn_report("Could not detect THP size, falling back to %" PRIx64
111 " MiB.", thp_size / MiB);
112 }
113
114 g_free(content);
115 return thp_size;
116 }
117
118 static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
119 {
120 const uint64_t page_size = qemu_ram_pagesize(rb);
121
122 /* We can have hugetlbfs with a page size smaller than the THP size. */
123 if (page_size == qemu_real_host_page_size) {
124 return MAX(page_size, virtio_mem_thp_size());
125 }
126 return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
127 }
128
129 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
130 static bool virtio_mem_has_shared_zeropage(RAMBlock *rb)
131 {
132 /*
133 * We only have a guaranteed shared zeropage on ordinary MAP_PRIVATE
134 * anonymous RAM. In any other case, reading unplugged *can* populate a
135 * fresh page, consuming actual memory.
136 */
137 return !qemu_ram_is_shared(rb) && rb->fd < 0 &&
138 qemu_ram_pagesize(rb) == qemu_real_host_page_size;
139 }
140 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
141
142 /*
143 * Size the usable region bigger than the requested size if possible. Esp.
144 * Linux guests will only add (aligned) memory blocks in case they fully
145 * fit into the usable region, but plug+online only a subset of the pages.
146 * The memory block size corresponds mostly to the section size.
147 *
148 * This allows e.g., to add 20MB with a section size of 128MB on x86_64, and
149 * a section size of 512MB on arm64 (as long as the start address is properly
150 * aligned, similar to ordinary DIMMs).
151 *
152 * We can change this at any time and maybe even make it configurable if
153 * necessary (as the section size can change). But it's more likely that the
154 * section size will rather get smaller and not bigger over time.
155 */
156 #if defined(TARGET_X86_64) || defined(TARGET_I386)
157 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
158 #elif defined(TARGET_ARM)
159 #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB))
160 #else
161 #error VIRTIO_MEM_USABLE_EXTENT not defined
162 #endif
163
164 static bool virtio_mem_is_busy(void)
165 {
166 /*
167 * Postcopy cannot handle concurrent discards and we don't want to migrate
168 * pages on-demand with stale content when plugging new blocks.
169 *
170 * For precopy, we don't want unplugged blocks in our migration stream, and
171 * when plugging new blocks, the page content might differ between source
172 * and destination (observable by the guest when not initializing pages
173 * after plugging them) until we're running on the destination (as we didn't
174 * migrate these blocks when they were unplugged).
175 */
176 return migration_in_incoming_postcopy() || !migration_is_idle();
177 }
178
179 typedef int (*virtio_mem_range_cb)(const VirtIOMEM *vmem, void *arg,
180 uint64_t offset, uint64_t size);
181
182 static int virtio_mem_for_each_unplugged_range(const VirtIOMEM *vmem, void *arg,
183 virtio_mem_range_cb cb)
184 {
185 unsigned long first_zero_bit, last_zero_bit;
186 uint64_t offset, size;
187 int ret = 0;
188
189 first_zero_bit = find_first_zero_bit(vmem->bitmap, vmem->bitmap_size);
190 while (first_zero_bit < vmem->bitmap_size) {
191 offset = first_zero_bit * vmem->block_size;
192 last_zero_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
193 first_zero_bit + 1) - 1;
194 size = (last_zero_bit - first_zero_bit + 1) * vmem->block_size;
195
196 ret = cb(vmem, arg, offset, size);
197 if (ret) {
198 break;
199 }
200 first_zero_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
201 last_zero_bit + 2);
202 }
203 return ret;
204 }
205
206 /*
207 * Adjust the memory section to cover the intersection with the given range.
208 *
209 * Returns false if the intersection is empty, otherwise returns true.
210 */
211 static bool virito_mem_intersect_memory_section(MemoryRegionSection *s,
212 uint64_t offset, uint64_t size)
213 {
214 uint64_t start = MAX(s->offset_within_region, offset);
215 uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
216 offset + size);
217
218 if (end <= start) {
219 return false;
220 }
221
222 s->offset_within_address_space += start - s->offset_within_region;
223 s->offset_within_region = start;
224 s->size = int128_make64(end - start);
225 return true;
226 }
227
228 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
229
230 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
231 MemoryRegionSection *s,
232 void *arg,
233 virtio_mem_section_cb cb)
234 {
235 unsigned long first_bit, last_bit;
236 uint64_t offset, size;
237 int ret = 0;
238
239 first_bit = s->offset_within_region / vmem->bitmap_size;
240 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
241 while (first_bit < vmem->bitmap_size) {
242 MemoryRegionSection tmp = *s;
243
244 offset = first_bit * vmem->block_size;
245 last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
246 first_bit + 1) - 1;
247 size = (last_bit - first_bit + 1) * vmem->block_size;
248
249 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
250 break;
251 }
252 ret = cb(&tmp, arg);
253 if (ret) {
254 break;
255 }
256 first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
257 last_bit + 2);
258 }
259 return ret;
260 }
261
262 static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
263 MemoryRegionSection *s,
264 void *arg,
265 virtio_mem_section_cb cb)
266 {
267 unsigned long first_bit, last_bit;
268 uint64_t offset, size;
269 int ret = 0;
270
271 first_bit = s->offset_within_region / vmem->bitmap_size;
272 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size, first_bit);
273 while (first_bit < vmem->bitmap_size) {
274 MemoryRegionSection tmp = *s;
275
276 offset = first_bit * vmem->block_size;
277 last_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
278 first_bit + 1) - 1;
279 size = (last_bit - first_bit + 1) * vmem->block_size;
280
281 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
282 break;
283 }
284 ret = cb(&tmp, arg);
285 if (ret) {
286 break;
287 }
288 first_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
289 last_bit + 2);
290 }
291 return ret;
292 }
293
294 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
295 {
296 RamDiscardListener *rdl = arg;
297
298 return rdl->notify_populate(rdl, s);
299 }
300
301 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
302 {
303 RamDiscardListener *rdl = arg;
304
305 rdl->notify_discard(rdl, s);
306 return 0;
307 }
308
309 static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
310 uint64_t size)
311 {
312 RamDiscardListener *rdl;
313
314 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
315 MemoryRegionSection tmp = *rdl->section;
316
317 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
318 continue;
319 }
320 rdl->notify_discard(rdl, &tmp);
321 }
322 }
323
324 static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
325 uint64_t size)
326 {
327 RamDiscardListener *rdl, *rdl2;
328 int ret = 0;
329
330 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
331 MemoryRegionSection tmp = *rdl->section;
332
333 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
334 continue;
335 }
336 ret = rdl->notify_populate(rdl, &tmp);
337 if (ret) {
338 break;
339 }
340 }
341
342 if (ret) {
343 /* Notify all already-notified listeners. */
344 QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
345 MemoryRegionSection tmp = *rdl->section;
346
347 if (rdl2 == rdl) {
348 break;
349 }
350 if (!virito_mem_intersect_memory_section(&tmp, offset, size)) {
351 continue;
352 }
353 rdl2->notify_discard(rdl2, &tmp);
354 }
355 }
356 return ret;
357 }
358
359 static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
360 {
361 RamDiscardListener *rdl;
362
363 if (!vmem->size) {
364 return;
365 }
366
367 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
368 if (rdl->double_discard_supported) {
369 rdl->notify_discard(rdl, rdl->section);
370 } else {
371 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
372 virtio_mem_notify_discard_cb);
373 }
374 }
375 }
376
377 static bool virtio_mem_test_bitmap(const VirtIOMEM *vmem, uint64_t start_gpa,
378 uint64_t size, bool plugged)
379 {
380 const unsigned long first_bit = (start_gpa - vmem->addr) / vmem->block_size;
381 const unsigned long last_bit = first_bit + (size / vmem->block_size) - 1;
382 unsigned long found_bit;
383
384 /* We fake a shorter bitmap to avoid searching too far. */
385 if (plugged) {
386 found_bit = find_next_zero_bit(vmem->bitmap, last_bit + 1, first_bit);
387 } else {
388 found_bit = find_next_bit(vmem->bitmap, last_bit + 1, first_bit);
389 }
390 return found_bit > last_bit;
391 }
392
393 static void virtio_mem_set_bitmap(VirtIOMEM *vmem, uint64_t start_gpa,
394 uint64_t size, bool plugged)
395 {
396 const unsigned long bit = (start_gpa - vmem->addr) / vmem->block_size;
397 const unsigned long nbits = size / vmem->block_size;
398
399 if (plugged) {
400 bitmap_set(vmem->bitmap, bit, nbits);
401 } else {
402 bitmap_clear(vmem->bitmap, bit, nbits);
403 }
404 }
405
406 static void virtio_mem_send_response(VirtIOMEM *vmem, VirtQueueElement *elem,
407 struct virtio_mem_resp *resp)
408 {
409 VirtIODevice *vdev = VIRTIO_DEVICE(vmem);
410 VirtQueue *vq = vmem->vq;
411
412 trace_virtio_mem_send_response(le16_to_cpu(resp->type));
413 iov_from_buf(elem->in_sg, elem->in_num, 0, resp, sizeof(*resp));
414
415 virtqueue_push(vq, elem, sizeof(*resp));
416 virtio_notify(vdev, vq);
417 }
418
419 static void virtio_mem_send_response_simple(VirtIOMEM *vmem,
420 VirtQueueElement *elem,
421 uint16_t type)
422 {
423 struct virtio_mem_resp resp = {
424 .type = cpu_to_le16(type),
425 };
426
427 virtio_mem_send_response(vmem, elem, &resp);
428 }
429
430 static bool virtio_mem_valid_range(const VirtIOMEM *vmem, uint64_t gpa,
431 uint64_t size)
432 {
433 if (!QEMU_IS_ALIGNED(gpa, vmem->block_size)) {
434 return false;
435 }
436 if (gpa + size < gpa || !size) {
437 return false;
438 }
439 if (gpa < vmem->addr || gpa >= vmem->addr + vmem->usable_region_size) {
440 return false;
441 }
442 if (gpa + size > vmem->addr + vmem->usable_region_size) {
443 return false;
444 }
445 return true;
446 }
447
448 static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa,
449 uint64_t size, bool plug)
450 {
451 const uint64_t offset = start_gpa - vmem->addr;
452 RAMBlock *rb = vmem->memdev->mr.ram_block;
453
454 if (virtio_mem_is_busy()) {
455 return -EBUSY;
456 }
457
458 if (!plug) {
459 if (ram_block_discard_range(rb, offset, size)) {
460 return -EBUSY;
461 }
462 virtio_mem_notify_unplug(vmem, offset, size);
463 } else {
464 int ret = 0;
465
466 if (vmem->prealloc) {
467 void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
468 int fd = memory_region_get_fd(&vmem->memdev->mr);
469 Error *local_err = NULL;
470
471 os_mem_prealloc(fd, area, size, 1, &local_err);
472 if (local_err) {
473 static bool warned;
474
475 /*
476 * Warn only once, we don't want to fill the log with these
477 * warnings.
478 */
479 if (!warned) {
480 warn_report_err(local_err);
481 warned = true;
482 } else {
483 error_free(local_err);
484 }
485 ret = -EBUSY;
486 }
487 }
488 if (!ret) {
489 ret = virtio_mem_notify_plug(vmem, offset, size);
490 }
491
492 if (ret) {
493 /* Could be preallocation or a notifier populated memory. */
494 ram_block_discard_range(vmem->memdev->mr.ram_block, offset, size);
495 return -EBUSY;
496 }
497 }
498 virtio_mem_set_bitmap(vmem, start_gpa, size, plug);
499 return 0;
500 }
501
502 static int virtio_mem_state_change_request(VirtIOMEM *vmem, uint64_t gpa,
503 uint16_t nb_blocks, bool plug)
504 {
505 const uint64_t size = nb_blocks * vmem->block_size;
506 int ret;
507
508 if (!virtio_mem_valid_range(vmem, gpa, size)) {
509 return VIRTIO_MEM_RESP_ERROR;
510 }
511
512 if (plug && (vmem->size + size > vmem->requested_size)) {
513 return VIRTIO_MEM_RESP_NACK;
514 }
515
516 /* test if really all blocks are in the opposite state */
517 if (!virtio_mem_test_bitmap(vmem, gpa, size, !plug)) {
518 return VIRTIO_MEM_RESP_ERROR;
519 }
520
521 ret = virtio_mem_set_block_state(vmem, gpa, size, plug);
522 if (ret) {
523 return VIRTIO_MEM_RESP_BUSY;
524 }
525 if (plug) {
526 vmem->size += size;
527 } else {
528 vmem->size -= size;
529 }
530 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
531 return VIRTIO_MEM_RESP_ACK;
532 }
533
534 static void virtio_mem_plug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
535 struct virtio_mem_req *req)
536 {
537 const uint64_t gpa = le64_to_cpu(req->u.plug.addr);
538 const uint16_t nb_blocks = le16_to_cpu(req->u.plug.nb_blocks);
539 uint16_t type;
540
541 trace_virtio_mem_plug_request(gpa, nb_blocks);
542 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, true);
543 virtio_mem_send_response_simple(vmem, elem, type);
544 }
545
546 static void virtio_mem_unplug_request(VirtIOMEM *vmem, VirtQueueElement *elem,
547 struct virtio_mem_req *req)
548 {
549 const uint64_t gpa = le64_to_cpu(req->u.unplug.addr);
550 const uint16_t nb_blocks = le16_to_cpu(req->u.unplug.nb_blocks);
551 uint16_t type;
552
553 trace_virtio_mem_unplug_request(gpa, nb_blocks);
554 type = virtio_mem_state_change_request(vmem, gpa, nb_blocks, false);
555 virtio_mem_send_response_simple(vmem, elem, type);
556 }
557
558 static void virtio_mem_resize_usable_region(VirtIOMEM *vmem,
559 uint64_t requested_size,
560 bool can_shrink)
561 {
562 uint64_t newsize = MIN(memory_region_size(&vmem->memdev->mr),
563 requested_size + VIRTIO_MEM_USABLE_EXTENT);
564
565 /* The usable region size always has to be multiples of the block size. */
566 newsize = QEMU_ALIGN_UP(newsize, vmem->block_size);
567
568 if (!requested_size) {
569 newsize = 0;
570 }
571
572 if (newsize < vmem->usable_region_size && !can_shrink) {
573 return;
574 }
575
576 trace_virtio_mem_resized_usable_region(vmem->usable_region_size, newsize);
577 vmem->usable_region_size = newsize;
578 }
579
580 static int virtio_mem_unplug_all(VirtIOMEM *vmem)
581 {
582 RAMBlock *rb = vmem->memdev->mr.ram_block;
583
584 if (virtio_mem_is_busy()) {
585 return -EBUSY;
586 }
587
588 if (ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb))) {
589 return -EBUSY;
590 }
591 virtio_mem_notify_unplug_all(vmem);
592
593 bitmap_clear(vmem->bitmap, 0, vmem->bitmap_size);
594 if (vmem->size) {
595 vmem->size = 0;
596 notifier_list_notify(&vmem->size_change_notifiers, &vmem->size);
597 }
598 trace_virtio_mem_unplugged_all();
599 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
600 return 0;
601 }
602
603 static void virtio_mem_unplug_all_request(VirtIOMEM *vmem,
604 VirtQueueElement *elem)
605 {
606 trace_virtio_mem_unplug_all_request();
607 if (virtio_mem_unplug_all(vmem)) {
608 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_BUSY);
609 } else {
610 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ACK);
611 }
612 }
613
614 static void virtio_mem_state_request(VirtIOMEM *vmem, VirtQueueElement *elem,
615 struct virtio_mem_req *req)
616 {
617 const uint16_t nb_blocks = le16_to_cpu(req->u.state.nb_blocks);
618 const uint64_t gpa = le64_to_cpu(req->u.state.addr);
619 const uint64_t size = nb_blocks * vmem->block_size;
620 struct virtio_mem_resp resp = {
621 .type = cpu_to_le16(VIRTIO_MEM_RESP_ACK),
622 };
623
624 trace_virtio_mem_state_request(gpa, nb_blocks);
625 if (!virtio_mem_valid_range(vmem, gpa, size)) {
626 virtio_mem_send_response_simple(vmem, elem, VIRTIO_MEM_RESP_ERROR);
627 return;
628 }
629
630 if (virtio_mem_test_bitmap(vmem, gpa, size, true)) {
631 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_PLUGGED);
632 } else if (virtio_mem_test_bitmap(vmem, gpa, size, false)) {
633 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_UNPLUGGED);
634 } else {
635 resp.u.state.state = cpu_to_le16(VIRTIO_MEM_STATE_MIXED);
636 }
637 trace_virtio_mem_state_response(le16_to_cpu(resp.u.state.state));
638 virtio_mem_send_response(vmem, elem, &resp);
639 }
640
641 static void virtio_mem_handle_request(VirtIODevice *vdev, VirtQueue *vq)
642 {
643 const int len = sizeof(struct virtio_mem_req);
644 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
645 VirtQueueElement *elem;
646 struct virtio_mem_req req;
647 uint16_t type;
648
649 while (true) {
650 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
651 if (!elem) {
652 return;
653 }
654
655 if (iov_to_buf(elem->out_sg, elem->out_num, 0, &req, len) < len) {
656 virtio_error(vdev, "virtio-mem protocol violation: invalid request"
657 " size: %d", len);
658 virtqueue_detach_element(vq, elem, 0);
659 g_free(elem);
660 return;
661 }
662
663 if (iov_size(elem->in_sg, elem->in_num) <
664 sizeof(struct virtio_mem_resp)) {
665 virtio_error(vdev, "virtio-mem protocol violation: not enough space"
666 " for response: %zu",
667 iov_size(elem->in_sg, elem->in_num));
668 virtqueue_detach_element(vq, elem, 0);
669 g_free(elem);
670 return;
671 }
672
673 type = le16_to_cpu(req.type);
674 switch (type) {
675 case VIRTIO_MEM_REQ_PLUG:
676 virtio_mem_plug_request(vmem, elem, &req);
677 break;
678 case VIRTIO_MEM_REQ_UNPLUG:
679 virtio_mem_unplug_request(vmem, elem, &req);
680 break;
681 case VIRTIO_MEM_REQ_UNPLUG_ALL:
682 virtio_mem_unplug_all_request(vmem, elem);
683 break;
684 case VIRTIO_MEM_REQ_STATE:
685 virtio_mem_state_request(vmem, elem, &req);
686 break;
687 default:
688 virtio_error(vdev, "virtio-mem protocol violation: unknown request"
689 " type: %d", type);
690 virtqueue_detach_element(vq, elem, 0);
691 g_free(elem);
692 return;
693 }
694
695 g_free(elem);
696 }
697 }
698
699 static void virtio_mem_get_config(VirtIODevice *vdev, uint8_t *config_data)
700 {
701 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
702 struct virtio_mem_config *config = (void *) config_data;
703
704 config->block_size = cpu_to_le64(vmem->block_size);
705 config->node_id = cpu_to_le16(vmem->node);
706 config->requested_size = cpu_to_le64(vmem->requested_size);
707 config->plugged_size = cpu_to_le64(vmem->size);
708 config->addr = cpu_to_le64(vmem->addr);
709 config->region_size = cpu_to_le64(memory_region_size(&vmem->memdev->mr));
710 config->usable_region_size = cpu_to_le64(vmem->usable_region_size);
711 }
712
713 static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features,
714 Error **errp)
715 {
716 MachineState *ms = MACHINE(qdev_get_machine());
717 VirtIOMEM *vmem = VIRTIO_MEM(vdev);
718
719 if (ms->numa_state) {
720 #if defined(CONFIG_ACPI)
721 virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM);
722 #endif
723 }
724 assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO);
725 if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) {
726 virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE);
727 }
728 return features;
729 }
730
731 static int virtio_mem_validate_features(VirtIODevice *vdev)
732 {
733 if (virtio_host_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE) &&
734 !virtio_vdev_has_feature(vdev, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) {
735 return -EFAULT;
736 }
737 return 0;
738 }
739
740 static void virtio_mem_system_reset(void *opaque)
741 {
742 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
743
744 /*
745 * During usual resets, we will unplug all memory and shrink the usable
746 * region size. This is, however, not possible in all scenarios. Then,
747 * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL).
748 */
749 virtio_mem_unplug_all(vmem);
750 }
751
752 static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
753 {
754 MachineState *ms = MACHINE(qdev_get_machine());
755 int nb_numa_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
756 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
757 VirtIOMEM *vmem = VIRTIO_MEM(dev);
758 uint64_t page_size;
759 RAMBlock *rb;
760 int ret;
761
762 if (!vmem->memdev) {
763 error_setg(errp, "'%s' property is not set", VIRTIO_MEM_MEMDEV_PROP);
764 return;
765 } else if (host_memory_backend_is_mapped(vmem->memdev)) {
766 error_setg(errp, "'%s' property specifies a busy memdev: %s",
767 VIRTIO_MEM_MEMDEV_PROP,
768 object_get_canonical_path_component(OBJECT(vmem->memdev)));
769 return;
770 } else if (!memory_region_is_ram(&vmem->memdev->mr) ||
771 memory_region_is_rom(&vmem->memdev->mr) ||
772 !vmem->memdev->mr.ram_block) {
773 error_setg(errp, "'%s' property specifies an unsupported memdev",
774 VIRTIO_MEM_MEMDEV_PROP);
775 return;
776 }
777
778 if ((nb_numa_nodes && vmem->node >= nb_numa_nodes) ||
779 (!nb_numa_nodes && vmem->node)) {
780 error_setg(errp, "'%s' property has value '%" PRIu32 "', which exceeds"
781 "the number of numa nodes: %d", VIRTIO_MEM_NODE_PROP,
782 vmem->node, nb_numa_nodes ? nb_numa_nodes : 1);
783 return;
784 }
785
786 if (enable_mlock) {
787 error_setg(errp, "Incompatible with mlock");
788 return;
789 }
790
791 rb = vmem->memdev->mr.ram_block;
792 page_size = qemu_ram_pagesize(rb);
793
794 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
795 switch (vmem->unplugged_inaccessible) {
796 case ON_OFF_AUTO_AUTO:
797 if (virtio_mem_has_shared_zeropage(rb)) {
798 vmem->unplugged_inaccessible = ON_OFF_AUTO_OFF;
799 } else {
800 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
801 }
802 break;
803 case ON_OFF_AUTO_OFF:
804 if (!virtio_mem_has_shared_zeropage(rb)) {
805 warn_report("'%s' property set to 'off' with a memdev that does"
806 " not support the shared zeropage.",
807 VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP);
808 }
809 break;
810 default:
811 break;
812 }
813 #else /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
814 vmem->unplugged_inaccessible = ON_OFF_AUTO_ON;
815 #endif /* VIRTIO_MEM_HAS_LEGACY_GUESTS */
816
817 /*
818 * If the block size wasn't configured by the user, use a sane default. This
819 * allows using hugetlbfs backends of any page size without manual
820 * intervention.
821 */
822 if (!vmem->block_size) {
823 vmem->block_size = virtio_mem_default_block_size(rb);
824 }
825
826 if (vmem->block_size < page_size) {
827 error_setg(errp, "'%s' property has to be at least the page size (0x%"
828 PRIx64 ")", VIRTIO_MEM_BLOCK_SIZE_PROP, page_size);
829 return;
830 } else if (vmem->block_size < virtio_mem_default_block_size(rb)) {
831 warn_report("'%s' property is smaller than the default block size (%"
832 PRIx64 " MiB)", VIRTIO_MEM_BLOCK_SIZE_PROP,
833 virtio_mem_default_block_size(rb) / MiB);
834 }
835 if (!QEMU_IS_ALIGNED(vmem->requested_size, vmem->block_size)) {
836 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
837 ")", VIRTIO_MEM_REQUESTED_SIZE_PROP,
838 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
839 return;
840 } else if (!QEMU_IS_ALIGNED(vmem->addr, vmem->block_size)) {
841 error_setg(errp, "'%s' property has to be multiples of '%s' (0x%" PRIx64
842 ")", VIRTIO_MEM_ADDR_PROP, VIRTIO_MEM_BLOCK_SIZE_PROP,
843 vmem->block_size);
844 return;
845 } else if (!QEMU_IS_ALIGNED(memory_region_size(&vmem->memdev->mr),
846 vmem->block_size)) {
847 error_setg(errp, "'%s' property memdev size has to be multiples of"
848 "'%s' (0x%" PRIx64 ")", VIRTIO_MEM_MEMDEV_PROP,
849 VIRTIO_MEM_BLOCK_SIZE_PROP, vmem->block_size);
850 return;
851 }
852
853 if (ram_block_coordinated_discard_require(true)) {
854 error_setg(errp, "Discarding RAM is disabled");
855 return;
856 }
857
858 ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
859 if (ret) {
860 error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
861 ram_block_coordinated_discard_require(false);
862 return;
863 }
864
865 virtio_mem_resize_usable_region(vmem, vmem->requested_size, true);
866
867 vmem->bitmap_size = memory_region_size(&vmem->memdev->mr) /
868 vmem->block_size;
869 vmem->bitmap = bitmap_new(vmem->bitmap_size);
870
871 virtio_init(vdev, TYPE_VIRTIO_MEM, VIRTIO_ID_MEM,
872 sizeof(struct virtio_mem_config));
873 vmem->vq = virtio_add_queue(vdev, 128, virtio_mem_handle_request);
874
875 host_memory_backend_set_mapped(vmem->memdev, true);
876 vmstate_register_ram(&vmem->memdev->mr, DEVICE(vmem));
877 qemu_register_reset(virtio_mem_system_reset, vmem);
878
879 /*
880 * Set ourselves as RamDiscardManager before the plug handler maps the
881 * memory region and exposes it via an address space.
882 */
883 memory_region_set_ram_discard_manager(&vmem->memdev->mr,
884 RAM_DISCARD_MANAGER(vmem));
885 }
886
887 static void virtio_mem_device_unrealize(DeviceState *dev)
888 {
889 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
890 VirtIOMEM *vmem = VIRTIO_MEM(dev);
891
892 /*
893 * The unplug handler unmapped the memory region, it cannot be
894 * found via an address space anymore. Unset ourselves.
895 */
896 memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
897 qemu_unregister_reset(virtio_mem_system_reset, vmem);
898 vmstate_unregister_ram(&vmem->memdev->mr, DEVICE(vmem));
899 host_memory_backend_set_mapped(vmem->memdev, false);
900 virtio_del_queue(vdev, 0);
901 virtio_cleanup(vdev);
902 g_free(vmem->bitmap);
903 ram_block_coordinated_discard_require(false);
904 }
905
906 static int virtio_mem_discard_range_cb(const VirtIOMEM *vmem, void *arg,
907 uint64_t offset, uint64_t size)
908 {
909 RAMBlock *rb = vmem->memdev->mr.ram_block;
910
911 return ram_block_discard_range(rb, offset, size) ? -EINVAL : 0;
912 }
913
914 static int virtio_mem_restore_unplugged(VirtIOMEM *vmem)
915 {
916 /* Make sure all memory is really discarded after migration. */
917 return virtio_mem_for_each_unplugged_range(vmem, NULL,
918 virtio_mem_discard_range_cb);
919 }
920
921 static int virtio_mem_post_load(void *opaque, int version_id)
922 {
923 VirtIOMEM *vmem = VIRTIO_MEM(opaque);
924 RamDiscardListener *rdl;
925 int ret;
926
927 /*
928 * We started out with all memory discarded and our memory region is mapped
929 * into an address space. Replay, now that we updated the bitmap.
930 */
931 QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
932 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
933 virtio_mem_notify_populate_cb);
934 if (ret) {
935 return ret;
936 }
937 }
938
939 if (migration_in_incoming_postcopy()) {
940 return 0;
941 }
942
943 return virtio_mem_restore_unplugged(vmem);
944 }
945
946 typedef struct VirtIOMEMMigSanityChecks {
947 VirtIOMEM *parent;
948 uint64_t addr;
949 uint64_t region_size;
950 uint64_t block_size;
951 uint32_t node;
952 } VirtIOMEMMigSanityChecks;
953
954 static int virtio_mem_mig_sanity_checks_pre_save(void *opaque)
955 {
956 VirtIOMEMMigSanityChecks *tmp = opaque;
957 VirtIOMEM *vmem = tmp->parent;
958
959 tmp->addr = vmem->addr;
960 tmp->region_size = memory_region_size(&vmem->memdev->mr);
961 tmp->block_size = vmem->block_size;
962 tmp->node = vmem->node;
963 return 0;
964 }
965
966 static int virtio_mem_mig_sanity_checks_post_load(void *opaque, int version_id)
967 {
968 VirtIOMEMMigSanityChecks *tmp = opaque;
969 VirtIOMEM *vmem = tmp->parent;
970 const uint64_t new_region_size = memory_region_size(&vmem->memdev->mr);
971
972 if (tmp->addr != vmem->addr) {
973 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
974 VIRTIO_MEM_ADDR_PROP, tmp->addr, vmem->addr);
975 return -EINVAL;
976 }
977 /*
978 * Note: Preparation for resizeable memory regions. The maximum size
979 * of the memory region must not change during migration.
980 */
981 if (tmp->region_size != new_region_size) {
982 error_report("Property '%s' size changed from 0x%" PRIx64 " to 0x%"
983 PRIx64, VIRTIO_MEM_MEMDEV_PROP, tmp->region_size,
984 new_region_size);
985 return -EINVAL;
986 }
987 if (tmp->block_size != vmem->block_size) {
988 error_report("Property '%s' changed from 0x%" PRIx64 " to 0x%" PRIx64,
989 VIRTIO_MEM_BLOCK_SIZE_PROP, tmp->block_size,
990 vmem->block_size);
991 return -EINVAL;
992 }
993 if (tmp->node != vmem->node) {
994 error_report("Property '%s' changed from %" PRIu32 " to %" PRIu32,
995 VIRTIO_MEM_NODE_PROP, tmp->node, vmem->node);
996 return -EINVAL;
997 }
998 return 0;
999 }
1000
1001 static const VMStateDescription vmstate_virtio_mem_sanity_checks = {
1002 .name = "virtio-mem-device/sanity-checks",
1003 .pre_save = virtio_mem_mig_sanity_checks_pre_save,
1004 .post_load = virtio_mem_mig_sanity_checks_post_load,
1005 .fields = (VMStateField[]) {
1006 VMSTATE_UINT64(addr, VirtIOMEMMigSanityChecks),
1007 VMSTATE_UINT64(region_size, VirtIOMEMMigSanityChecks),
1008 VMSTATE_UINT64(block_size, VirtIOMEMMigSanityChecks),
1009 VMSTATE_UINT32(node, VirtIOMEMMigSanityChecks),
1010 VMSTATE_END_OF_LIST(),
1011 },
1012 };
1013
1014 static const VMStateDescription vmstate_virtio_mem_device = {
1015 .name = "virtio-mem-device",
1016 .minimum_version_id = 1,
1017 .version_id = 1,
1018 .priority = MIG_PRI_VIRTIO_MEM,
1019 .post_load = virtio_mem_post_load,
1020 .fields = (VMStateField[]) {
1021 VMSTATE_WITH_TMP(VirtIOMEM, VirtIOMEMMigSanityChecks,
1022 vmstate_virtio_mem_sanity_checks),
1023 VMSTATE_UINT64(usable_region_size, VirtIOMEM),
1024 VMSTATE_UINT64(size, VirtIOMEM),
1025 VMSTATE_UINT64(requested_size, VirtIOMEM),
1026 VMSTATE_BITMAP(bitmap, VirtIOMEM, 0, bitmap_size),
1027 VMSTATE_END_OF_LIST()
1028 },
1029 };
1030
1031 static const VMStateDescription vmstate_virtio_mem = {
1032 .name = "virtio-mem",
1033 .minimum_version_id = 1,
1034 .version_id = 1,
1035 .fields = (VMStateField[]) {
1036 VMSTATE_VIRTIO_DEVICE,
1037 VMSTATE_END_OF_LIST()
1038 },
1039 };
1040
1041 static void virtio_mem_fill_device_info(const VirtIOMEM *vmem,
1042 VirtioMEMDeviceInfo *vi)
1043 {
1044 vi->memaddr = vmem->addr;
1045 vi->node = vmem->node;
1046 vi->requested_size = vmem->requested_size;
1047 vi->size = vmem->size;
1048 vi->max_size = memory_region_size(&vmem->memdev->mr);
1049 vi->block_size = vmem->block_size;
1050 vi->memdev = object_get_canonical_path(OBJECT(vmem->memdev));
1051 }
1052
1053 static MemoryRegion *virtio_mem_get_memory_region(VirtIOMEM *vmem, Error **errp)
1054 {
1055 if (!vmem->memdev) {
1056 error_setg(errp, "'%s' property must be set", VIRTIO_MEM_MEMDEV_PROP);
1057 return NULL;
1058 }
1059
1060 return &vmem->memdev->mr;
1061 }
1062
1063 static void virtio_mem_add_size_change_notifier(VirtIOMEM *vmem,
1064 Notifier *notifier)
1065 {
1066 notifier_list_add(&vmem->size_change_notifiers, notifier);
1067 }
1068
1069 static void virtio_mem_remove_size_change_notifier(VirtIOMEM *vmem,
1070 Notifier *notifier)
1071 {
1072 notifier_remove(notifier);
1073 }
1074
1075 static void virtio_mem_get_size(Object *obj, Visitor *v, const char *name,
1076 void *opaque, Error **errp)
1077 {
1078 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1079 uint64_t value = vmem->size;
1080
1081 visit_type_size(v, name, &value, errp);
1082 }
1083
1084 static void virtio_mem_get_requested_size(Object *obj, Visitor *v,
1085 const char *name, void *opaque,
1086 Error **errp)
1087 {
1088 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1089 uint64_t value = vmem->requested_size;
1090
1091 visit_type_size(v, name, &value, errp);
1092 }
1093
1094 static void virtio_mem_set_requested_size(Object *obj, Visitor *v,
1095 const char *name, void *opaque,
1096 Error **errp)
1097 {
1098 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1099 Error *err = NULL;
1100 uint64_t value;
1101
1102 visit_type_size(v, name, &value, &err);
1103 if (err) {
1104 error_propagate(errp, err);
1105 return;
1106 }
1107
1108 /*
1109 * The block size and memory backend are not fixed until the device was
1110 * realized. realize() will verify these properties then.
1111 */
1112 if (DEVICE(obj)->realized) {
1113 if (!QEMU_IS_ALIGNED(value, vmem->block_size)) {
1114 error_setg(errp, "'%s' has to be multiples of '%s' (0x%" PRIx64
1115 ")", name, VIRTIO_MEM_BLOCK_SIZE_PROP,
1116 vmem->block_size);
1117 return;
1118 } else if (value > memory_region_size(&vmem->memdev->mr)) {
1119 error_setg(errp, "'%s' cannot exceed the memory backend size"
1120 "(0x%" PRIx64 ")", name,
1121 memory_region_size(&vmem->memdev->mr));
1122 return;
1123 }
1124
1125 if (value != vmem->requested_size) {
1126 virtio_mem_resize_usable_region(vmem, value, false);
1127 vmem->requested_size = value;
1128 }
1129 /*
1130 * Trigger a config update so the guest gets notified. We trigger
1131 * even if the size didn't change (especially helpful for debugging).
1132 */
1133 virtio_notify_config(VIRTIO_DEVICE(vmem));
1134 } else {
1135 vmem->requested_size = value;
1136 }
1137 }
1138
1139 static void virtio_mem_get_block_size(Object *obj, Visitor *v, const char *name,
1140 void *opaque, Error **errp)
1141 {
1142 const VirtIOMEM *vmem = VIRTIO_MEM(obj);
1143 uint64_t value = vmem->block_size;
1144
1145 /*
1146 * If not configured by the user (and we're not realized yet), use the
1147 * default block size we would use with the current memory backend.
1148 */
1149 if (!value) {
1150 if (vmem->memdev && memory_region_is_ram(&vmem->memdev->mr)) {
1151 value = virtio_mem_default_block_size(vmem->memdev->mr.ram_block);
1152 } else {
1153 value = virtio_mem_thp_size();
1154 }
1155 }
1156
1157 visit_type_size(v, name, &value, errp);
1158 }
1159
1160 static void virtio_mem_set_block_size(Object *obj, Visitor *v, const char *name,
1161 void *opaque, Error **errp)
1162 {
1163 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1164 Error *err = NULL;
1165 uint64_t value;
1166
1167 if (DEVICE(obj)->realized) {
1168 error_setg(errp, "'%s' cannot be changed", name);
1169 return;
1170 }
1171
1172 visit_type_size(v, name, &value, &err);
1173 if (err) {
1174 error_propagate(errp, err);
1175 return;
1176 }
1177
1178 if (value < VIRTIO_MEM_MIN_BLOCK_SIZE) {
1179 error_setg(errp, "'%s' property has to be at least 0x%" PRIx32, name,
1180 VIRTIO_MEM_MIN_BLOCK_SIZE);
1181 return;
1182 } else if (!is_power_of_2(value)) {
1183 error_setg(errp, "'%s' property has to be a power of two", name);
1184 return;
1185 }
1186 vmem->block_size = value;
1187 }
1188
1189 static void virtio_mem_instance_init(Object *obj)
1190 {
1191 VirtIOMEM *vmem = VIRTIO_MEM(obj);
1192
1193 notifier_list_init(&vmem->size_change_notifiers);
1194 QLIST_INIT(&vmem->rdl_list);
1195
1196 object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", virtio_mem_get_size,
1197 NULL, NULL, NULL);
1198 object_property_add(obj, VIRTIO_MEM_REQUESTED_SIZE_PROP, "size",
1199 virtio_mem_get_requested_size,
1200 virtio_mem_set_requested_size, NULL, NULL);
1201 object_property_add(obj, VIRTIO_MEM_BLOCK_SIZE_PROP, "size",
1202 virtio_mem_get_block_size, virtio_mem_set_block_size,
1203 NULL, NULL);
1204 }
1205
1206 static Property virtio_mem_properties[] = {
1207 DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0),
1208 DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0),
1209 DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false),
1210 DEFINE_PROP_LINK(VIRTIO_MEM_MEMDEV_PROP, VirtIOMEM, memdev,
1211 TYPE_MEMORY_BACKEND, HostMemoryBackend *),
1212 #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS)
1213 DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM,
1214 unplugged_inaccessible, ON_OFF_AUTO_AUTO),
1215 #endif
1216 DEFINE_PROP_END_OF_LIST(),
1217 };
1218
1219 static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
1220 const MemoryRegion *mr)
1221 {
1222 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1223
1224 g_assert(mr == &vmem->memdev->mr);
1225 return vmem->block_size;
1226 }
1227
1228 static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
1229 const MemoryRegionSection *s)
1230 {
1231 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1232 uint64_t start_gpa = vmem->addr + s->offset_within_region;
1233 uint64_t end_gpa = start_gpa + int128_get64(s->size);
1234
1235 g_assert(s->mr == &vmem->memdev->mr);
1236
1237 start_gpa = QEMU_ALIGN_DOWN(start_gpa, vmem->block_size);
1238 end_gpa = QEMU_ALIGN_UP(end_gpa, vmem->block_size);
1239
1240 if (!virtio_mem_valid_range(vmem, start_gpa, end_gpa - start_gpa)) {
1241 return false;
1242 }
1243
1244 return virtio_mem_test_bitmap(vmem, start_gpa, end_gpa - start_gpa, true);
1245 }
1246
1247 struct VirtIOMEMReplayData {
1248 void *fn;
1249 void *opaque;
1250 };
1251
1252 static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
1253 {
1254 struct VirtIOMEMReplayData *data = arg;
1255
1256 return ((ReplayRamPopulate)data->fn)(s, data->opaque);
1257 }
1258
1259 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
1260 MemoryRegionSection *s,
1261 ReplayRamPopulate replay_fn,
1262 void *opaque)
1263 {
1264 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1265 struct VirtIOMEMReplayData data = {
1266 .fn = replay_fn,
1267 .opaque = opaque,
1268 };
1269
1270 g_assert(s->mr == &vmem->memdev->mr);
1271 return virtio_mem_for_each_plugged_section(vmem, s, &data,
1272 virtio_mem_rdm_replay_populated_cb);
1273 }
1274
1275 static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
1276 void *arg)
1277 {
1278 struct VirtIOMEMReplayData *data = arg;
1279
1280 ((ReplayRamDiscard)data->fn)(s, data->opaque);
1281 return 0;
1282 }
1283
1284 static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
1285 MemoryRegionSection *s,
1286 ReplayRamDiscard replay_fn,
1287 void *opaque)
1288 {
1289 const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1290 struct VirtIOMEMReplayData data = {
1291 .fn = replay_fn,
1292 .opaque = opaque,
1293 };
1294
1295 g_assert(s->mr == &vmem->memdev->mr);
1296 virtio_mem_for_each_unplugged_section(vmem, s, &data,
1297 virtio_mem_rdm_replay_discarded_cb);
1298 }
1299
1300 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
1301 RamDiscardListener *rdl,
1302 MemoryRegionSection *s)
1303 {
1304 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1305 int ret;
1306
1307 g_assert(s->mr == &vmem->memdev->mr);
1308 rdl->section = memory_region_section_new_copy(s);
1309
1310 QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
1311 ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1312 virtio_mem_notify_populate_cb);
1313 if (ret) {
1314 error_report("%s: Replaying plugged ranges failed: %s", __func__,
1315 strerror(-ret));
1316 }
1317 }
1318
1319 static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
1320 RamDiscardListener *rdl)
1321 {
1322 VirtIOMEM *vmem = VIRTIO_MEM(rdm);
1323
1324 g_assert(rdl->section->mr == &vmem->memdev->mr);
1325 if (vmem->size) {
1326 if (rdl->double_discard_supported) {
1327 rdl->notify_discard(rdl, rdl->section);
1328 } else {
1329 virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
1330 virtio_mem_notify_discard_cb);
1331 }
1332 }
1333
1334 memory_region_section_free_copy(rdl->section);
1335 rdl->section = NULL;
1336 QLIST_REMOVE(rdl, next);
1337 }
1338
1339 static void virtio_mem_class_init(ObjectClass *klass, void *data)
1340 {
1341 DeviceClass *dc = DEVICE_CLASS(klass);
1342 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1343 VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
1344 RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
1345
1346 device_class_set_props(dc, virtio_mem_properties);
1347 dc->vmsd = &vmstate_virtio_mem;
1348
1349 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
1350 vdc->realize = virtio_mem_device_realize;
1351 vdc->unrealize = virtio_mem_device_unrealize;
1352 vdc->get_config = virtio_mem_get_config;
1353 vdc->get_features = virtio_mem_get_features;
1354 vdc->validate_features = virtio_mem_validate_features;
1355 vdc->vmsd = &vmstate_virtio_mem_device;
1356
1357 vmc->fill_device_info = virtio_mem_fill_device_info;
1358 vmc->get_memory_region = virtio_mem_get_memory_region;
1359 vmc->add_size_change_notifier = virtio_mem_add_size_change_notifier;
1360 vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
1361
1362 rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
1363 rdmc->is_populated = virtio_mem_rdm_is_populated;
1364 rdmc->replay_populated = virtio_mem_rdm_replay_populated;
1365 rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
1366 rdmc->register_listener = virtio_mem_rdm_register_listener;
1367 rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
1368 }
1369
1370 static const TypeInfo virtio_mem_info = {
1371 .name = TYPE_VIRTIO_MEM,
1372 .parent = TYPE_VIRTIO_DEVICE,
1373 .instance_size = sizeof(VirtIOMEM),
1374 .instance_init = virtio_mem_instance_init,
1375 .class_init = virtio_mem_class_init,
1376 .class_size = sizeof(VirtIOMEMClass),
1377 .interfaces = (InterfaceInfo[]) {
1378 { TYPE_RAM_DISCARD_MANAGER },
1379 { }
1380 },
1381 };
1382
1383 static void virtio_register_types(void)
1384 {
1385 type_register_static(&virtio_mem_info);
1386 }
1387
1388 type_init(virtio_register_types)