configure: fix --meson=/path/to/meson
[qemu.git] / migration / ram.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <quintela@redhat.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28
29 #include "qemu/osdep.h"
30 #include "cpu.h"
31 #include "qemu/cutils.h"
32 #include "qemu/bitops.h"
33 #include "qemu/bitmap.h"
34 #include "qemu/main-loop.h"
35 #include "xbzrle.h"
36 #include "ram.h"
37 #include "migration.h"
38 #include "migration/register.h"
39 #include "migration/misc.h"
40 #include "qemu-file.h"
41 #include "postcopy-ram.h"
42 #include "page_cache.h"
43 #include "qemu/error-report.h"
44 #include "qapi/error.h"
45 #include "qapi/qapi-types-migration.h"
46 #include "qapi/qapi-events-migration.h"
47 #include "qapi/qmp/qerror.h"
48 #include "trace.h"
49 #include "exec/ram_addr.h"
50 #include "exec/target_page.h"
51 #include "qemu/rcu_queue.h"
52 #include "migration/colo.h"
53 #include "block.h"
54 #include "sysemu/sysemu.h"
55 #include "sysemu/cpu-throttle.h"
56 #include "savevm.h"
57 #include "qemu/iov.h"
58 #include "multifd.h"
59
60 /***********************************************************/
61 /* ram save/restore */
62
63 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
64 * worked for pages that where filled with the same char. We switched
65 * it to only search for the zero value. And to avoid confusion with
66 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
67 */
68
69 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
70 #define RAM_SAVE_FLAG_ZERO 0x02
71 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
72 #define RAM_SAVE_FLAG_PAGE 0x08
73 #define RAM_SAVE_FLAG_EOS 0x10
74 #define RAM_SAVE_FLAG_CONTINUE 0x20
75 #define RAM_SAVE_FLAG_XBZRLE 0x40
76 /* 0x80 is reserved in migration.h start with 0x100 next */
77 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78
79 static inline bool is_zero_range(uint8_t *p, uint64_t size)
80 {
81 return buffer_is_zero(p, size);
82 }
83
84 XBZRLECacheStats xbzrle_counters;
85
86 /* struct contains XBZRLE cache and a static page
87 used by the compression */
88 static struct {
89 /* buffer used for XBZRLE encoding */
90 uint8_t *encoded_buf;
91 /* buffer for storing page content */
92 uint8_t *current_buf;
93 /* Cache for XBZRLE, Protected by lock. */
94 PageCache *cache;
95 QemuMutex lock;
96 /* it will store a page full of zeros */
97 uint8_t *zero_target_page;
98 /* buffer used for XBZRLE decoding */
99 uint8_t *decoded_buf;
100 } XBZRLE;
101
102 static void XBZRLE_cache_lock(void)
103 {
104 if (migrate_use_xbzrle())
105 qemu_mutex_lock(&XBZRLE.lock);
106 }
107
108 static void XBZRLE_cache_unlock(void)
109 {
110 if (migrate_use_xbzrle())
111 qemu_mutex_unlock(&XBZRLE.lock);
112 }
113
114 /**
115 * xbzrle_cache_resize: resize the xbzrle cache
116 *
117 * This function is called from qmp_migrate_set_cache_size in main
118 * thread, possibly while a migration is in progress. A running
119 * migration may be using the cache and might finish during this call,
120 * hence changes to the cache are protected by XBZRLE.lock().
121 *
122 * Returns 0 for success or -1 for error
123 *
124 * @new_size: new cache size
125 * @errp: set *errp if the check failed, with reason
126 */
127 int xbzrle_cache_resize(int64_t new_size, Error **errp)
128 {
129 PageCache *new_cache;
130 int64_t ret = 0;
131
132 /* Check for truncation */
133 if (new_size != (size_t)new_size) {
134 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
135 "exceeding address space");
136 return -1;
137 }
138
139 if (new_size == migrate_xbzrle_cache_size()) {
140 /* nothing to do */
141 return 0;
142 }
143
144 XBZRLE_cache_lock();
145
146 if (XBZRLE.cache != NULL) {
147 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
148 if (!new_cache) {
149 ret = -1;
150 goto out;
151 }
152
153 cache_fini(XBZRLE.cache);
154 XBZRLE.cache = new_cache;
155 }
156 out:
157 XBZRLE_cache_unlock();
158 return ret;
159 }
160
161 bool ramblock_is_ignored(RAMBlock *block)
162 {
163 return !qemu_ram_is_migratable(block) ||
164 (migrate_ignore_shared() && qemu_ram_is_shared(block));
165 }
166
167 #undef RAMBLOCK_FOREACH
168
169 int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque)
170 {
171 RAMBlock *block;
172 int ret = 0;
173
174 RCU_READ_LOCK_GUARD();
175
176 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
177 ret = func(block, opaque);
178 if (ret) {
179 break;
180 }
181 }
182 return ret;
183 }
184
185 static void ramblock_recv_map_init(void)
186 {
187 RAMBlock *rb;
188
189 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
190 assert(!rb->receivedmap);
191 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
192 }
193 }
194
195 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
196 {
197 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
198 rb->receivedmap);
199 }
200
201 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
202 {
203 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
204 }
205
206 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
207 {
208 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
209 }
210
211 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
212 size_t nr)
213 {
214 bitmap_set_atomic(rb->receivedmap,
215 ramblock_recv_bitmap_offset(host_addr, rb),
216 nr);
217 }
218
219 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
220
221 /*
222 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
223 *
224 * Returns >0 if success with sent bytes, or <0 if error.
225 */
226 int64_t ramblock_recv_bitmap_send(QEMUFile *file,
227 const char *block_name)
228 {
229 RAMBlock *block = qemu_ram_block_by_name(block_name);
230 unsigned long *le_bitmap, nbits;
231 uint64_t size;
232
233 if (!block) {
234 error_report("%s: invalid block name: %s", __func__, block_name);
235 return -1;
236 }
237
238 nbits = block->used_length >> TARGET_PAGE_BITS;
239
240 /*
241 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
242 * machines we may need 4 more bytes for padding (see below
243 * comment). So extend it a bit before hand.
244 */
245 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
246
247 /*
248 * Always use little endian when sending the bitmap. This is
249 * required that when source and destination VMs are not using the
250 * same endianness. (Note: big endian won't work.)
251 */
252 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
253
254 /* Size of the bitmap, in bytes */
255 size = DIV_ROUND_UP(nbits, 8);
256
257 /*
258 * size is always aligned to 8 bytes for 64bit machines, but it
259 * may not be true for 32bit machines. We need this padding to
260 * make sure the migration can survive even between 32bit and
261 * 64bit machines.
262 */
263 size = ROUND_UP(size, 8);
264
265 qemu_put_be64(file, size);
266 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
267 /*
268 * Mark as an end, in case the middle part is screwed up due to
269 * some "mysterious" reason.
270 */
271 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
272 qemu_fflush(file);
273
274 g_free(le_bitmap);
275
276 if (qemu_file_get_error(file)) {
277 return qemu_file_get_error(file);
278 }
279
280 return size + sizeof(size);
281 }
282
283 /*
284 * An outstanding page request, on the source, having been received
285 * and queued
286 */
287 struct RAMSrcPageRequest {
288 RAMBlock *rb;
289 hwaddr offset;
290 hwaddr len;
291
292 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
293 };
294
295 /* State of RAM for migration */
296 struct RAMState {
297 /* QEMUFile used for this migration */
298 QEMUFile *f;
299 /* Last block that we have visited searching for dirty pages */
300 RAMBlock *last_seen_block;
301 /* Last block from where we have sent data */
302 RAMBlock *last_sent_block;
303 /* Last dirty target page we have sent */
304 ram_addr_t last_page;
305 /* last ram version we have seen */
306 uint32_t last_version;
307 /* We are in the first round */
308 bool ram_bulk_stage;
309 /* The free page optimization is enabled */
310 bool fpo_enabled;
311 /* How many times we have dirty too many pages */
312 int dirty_rate_high_cnt;
313 /* these variables are used for bitmap sync */
314 /* last time we did a full bitmap_sync */
315 int64_t time_last_bitmap_sync;
316 /* bytes transferred at start_time */
317 uint64_t bytes_xfer_prev;
318 /* number of dirty pages since start_time */
319 uint64_t num_dirty_pages_period;
320 /* xbzrle misses since the beginning of the period */
321 uint64_t xbzrle_cache_miss_prev;
322 /* Amount of xbzrle pages since the beginning of the period */
323 uint64_t xbzrle_pages_prev;
324 /* Amount of xbzrle encoded bytes since the beginning of the period */
325 uint64_t xbzrle_bytes_prev;
326
327 /* compression statistics since the beginning of the period */
328 /* amount of count that no free thread to compress data */
329 uint64_t compress_thread_busy_prev;
330 /* amount bytes after compression */
331 uint64_t compressed_size_prev;
332 /* amount of compressed pages */
333 uint64_t compress_pages_prev;
334
335 /* total handled target pages at the beginning of period */
336 uint64_t target_page_count_prev;
337 /* total handled target pages since start */
338 uint64_t target_page_count;
339 /* number of dirty bits in the bitmap */
340 uint64_t migration_dirty_pages;
341 /* Protects modification of the bitmap and migration dirty pages */
342 QemuMutex bitmap_mutex;
343 /* The RAMBlock used in the last src_page_requests */
344 RAMBlock *last_req_rb;
345 /* Queue of outstanding page requests from the destination */
346 QemuMutex src_page_req_mutex;
347 QSIMPLEQ_HEAD(, RAMSrcPageRequest) src_page_requests;
348 };
349 typedef struct RAMState RAMState;
350
351 static RAMState *ram_state;
352
353 static NotifierWithReturnList precopy_notifier_list;
354
355 void precopy_infrastructure_init(void)
356 {
357 notifier_with_return_list_init(&precopy_notifier_list);
358 }
359
360 void precopy_add_notifier(NotifierWithReturn *n)
361 {
362 notifier_with_return_list_add(&precopy_notifier_list, n);
363 }
364
365 void precopy_remove_notifier(NotifierWithReturn *n)
366 {
367 notifier_with_return_remove(n);
368 }
369
370 int precopy_notify(PrecopyNotifyReason reason, Error **errp)
371 {
372 PrecopyNotifyData pnd;
373 pnd.reason = reason;
374 pnd.errp = errp;
375
376 return notifier_with_return_list_notify(&precopy_notifier_list, &pnd);
377 }
378
379 void precopy_enable_free_page_optimization(void)
380 {
381 if (!ram_state) {
382 return;
383 }
384
385 ram_state->fpo_enabled = true;
386 }
387
388 uint64_t ram_bytes_remaining(void)
389 {
390 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
391 0;
392 }
393
394 MigrationStats ram_counters;
395
396 /* used by the search for pages to send */
397 struct PageSearchStatus {
398 /* Current block being searched */
399 RAMBlock *block;
400 /* Current page to search from */
401 unsigned long page;
402 /* Set once we wrap around */
403 bool complete_round;
404 };
405 typedef struct PageSearchStatus PageSearchStatus;
406
407 CompressionStats compression_counters;
408
409 struct CompressParam {
410 bool done;
411 bool quit;
412 bool zero_page;
413 QEMUFile *file;
414 QemuMutex mutex;
415 QemuCond cond;
416 RAMBlock *block;
417 ram_addr_t offset;
418
419 /* internally used fields */
420 z_stream stream;
421 uint8_t *originbuf;
422 };
423 typedef struct CompressParam CompressParam;
424
425 struct DecompressParam {
426 bool done;
427 bool quit;
428 QemuMutex mutex;
429 QemuCond cond;
430 void *des;
431 uint8_t *compbuf;
432 int len;
433 z_stream stream;
434 };
435 typedef struct DecompressParam DecompressParam;
436
437 static CompressParam *comp_param;
438 static QemuThread *compress_threads;
439 /* comp_done_cond is used to wake up the migration thread when
440 * one of the compression threads has finished the compression.
441 * comp_done_lock is used to co-work with comp_done_cond.
442 */
443 static QemuMutex comp_done_lock;
444 static QemuCond comp_done_cond;
445 /* The empty QEMUFileOps will be used by file in CompressParam */
446 static const QEMUFileOps empty_ops = { };
447
448 static QEMUFile *decomp_file;
449 static DecompressParam *decomp_param;
450 static QemuThread *decompress_threads;
451 static QemuMutex decomp_done_lock;
452 static QemuCond decomp_done_cond;
453
454 static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
455 ram_addr_t offset, uint8_t *source_buf);
456
457 static void *do_data_compress(void *opaque)
458 {
459 CompressParam *param = opaque;
460 RAMBlock *block;
461 ram_addr_t offset;
462 bool zero_page;
463
464 qemu_mutex_lock(&param->mutex);
465 while (!param->quit) {
466 if (param->block) {
467 block = param->block;
468 offset = param->offset;
469 param->block = NULL;
470 qemu_mutex_unlock(&param->mutex);
471
472 zero_page = do_compress_ram_page(param->file, &param->stream,
473 block, offset, param->originbuf);
474
475 qemu_mutex_lock(&comp_done_lock);
476 param->done = true;
477 param->zero_page = zero_page;
478 qemu_cond_signal(&comp_done_cond);
479 qemu_mutex_unlock(&comp_done_lock);
480
481 qemu_mutex_lock(&param->mutex);
482 } else {
483 qemu_cond_wait(&param->cond, &param->mutex);
484 }
485 }
486 qemu_mutex_unlock(&param->mutex);
487
488 return NULL;
489 }
490
491 static void compress_threads_save_cleanup(void)
492 {
493 int i, thread_count;
494
495 if (!migrate_use_compression() || !comp_param) {
496 return;
497 }
498
499 thread_count = migrate_compress_threads();
500 for (i = 0; i < thread_count; i++) {
501 /*
502 * we use it as a indicator which shows if the thread is
503 * properly init'd or not
504 */
505 if (!comp_param[i].file) {
506 break;
507 }
508
509 qemu_mutex_lock(&comp_param[i].mutex);
510 comp_param[i].quit = true;
511 qemu_cond_signal(&comp_param[i].cond);
512 qemu_mutex_unlock(&comp_param[i].mutex);
513
514 qemu_thread_join(compress_threads + i);
515 qemu_mutex_destroy(&comp_param[i].mutex);
516 qemu_cond_destroy(&comp_param[i].cond);
517 deflateEnd(&comp_param[i].stream);
518 g_free(comp_param[i].originbuf);
519 qemu_fclose(comp_param[i].file);
520 comp_param[i].file = NULL;
521 }
522 qemu_mutex_destroy(&comp_done_lock);
523 qemu_cond_destroy(&comp_done_cond);
524 g_free(compress_threads);
525 g_free(comp_param);
526 compress_threads = NULL;
527 comp_param = NULL;
528 }
529
530 static int compress_threads_save_setup(void)
531 {
532 int i, thread_count;
533
534 if (!migrate_use_compression()) {
535 return 0;
536 }
537 thread_count = migrate_compress_threads();
538 compress_threads = g_new0(QemuThread, thread_count);
539 comp_param = g_new0(CompressParam, thread_count);
540 qemu_cond_init(&comp_done_cond);
541 qemu_mutex_init(&comp_done_lock);
542 for (i = 0; i < thread_count; i++) {
543 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
544 if (!comp_param[i].originbuf) {
545 goto exit;
546 }
547
548 if (deflateInit(&comp_param[i].stream,
549 migrate_compress_level()) != Z_OK) {
550 g_free(comp_param[i].originbuf);
551 goto exit;
552 }
553
554 /* comp_param[i].file is just used as a dummy buffer to save data,
555 * set its ops to empty.
556 */
557 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
558 comp_param[i].done = true;
559 comp_param[i].quit = false;
560 qemu_mutex_init(&comp_param[i].mutex);
561 qemu_cond_init(&comp_param[i].cond);
562 qemu_thread_create(compress_threads + i, "compress",
563 do_data_compress, comp_param + i,
564 QEMU_THREAD_JOINABLE);
565 }
566 return 0;
567
568 exit:
569 compress_threads_save_cleanup();
570 return -1;
571 }
572
573 /**
574 * save_page_header: write page header to wire
575 *
576 * If this is the 1st block, it also writes the block identification
577 *
578 * Returns the number of bytes written
579 *
580 * @f: QEMUFile where to send the data
581 * @block: block that contains the page we want to send
582 * @offset: offset inside the block for the page
583 * in the lower bits, it contains flags
584 */
585 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
586 ram_addr_t offset)
587 {
588 size_t size, len;
589
590 if (block == rs->last_sent_block) {
591 offset |= RAM_SAVE_FLAG_CONTINUE;
592 }
593 qemu_put_be64(f, offset);
594 size = 8;
595
596 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
597 len = strlen(block->idstr);
598 qemu_put_byte(f, len);
599 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
600 size += 1 + len;
601 rs->last_sent_block = block;
602 }
603 return size;
604 }
605
606 /**
607 * mig_throttle_guest_down: throotle down the guest
608 *
609 * Reduce amount of guest cpu execution to hopefully slow down memory
610 * writes. If guest dirty memory rate is reduced below the rate at
611 * which we can transfer pages to the destination then we should be
612 * able to complete migration. Some workloads dirty memory way too
613 * fast and will not effectively converge, even with auto-converge.
614 */
615 static void mig_throttle_guest_down(uint64_t bytes_dirty_period,
616 uint64_t bytes_dirty_threshold)
617 {
618 MigrationState *s = migrate_get_current();
619 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
620 uint64_t pct_increment = s->parameters.cpu_throttle_increment;
621 bool pct_tailslow = s->parameters.cpu_throttle_tailslow;
622 int pct_max = s->parameters.max_cpu_throttle;
623
624 uint64_t throttle_now = cpu_throttle_get_percentage();
625 uint64_t cpu_now, cpu_ideal, throttle_inc;
626
627 /* We have not started throttling yet. Let's start it. */
628 if (!cpu_throttle_active()) {
629 cpu_throttle_set(pct_initial);
630 } else {
631 /* Throttling already on, just increase the rate */
632 if (!pct_tailslow) {
633 throttle_inc = pct_increment;
634 } else {
635 /* Compute the ideal CPU percentage used by Guest, which may
636 * make the dirty rate match the dirty rate threshold. */
637 cpu_now = 100 - throttle_now;
638 cpu_ideal = cpu_now * (bytes_dirty_threshold * 1.0 /
639 bytes_dirty_period);
640 throttle_inc = MIN(cpu_now - cpu_ideal, pct_increment);
641 }
642 cpu_throttle_set(MIN(throttle_now + throttle_inc, pct_max));
643 }
644 }
645
646 /**
647 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
648 *
649 * @rs: current RAM state
650 * @current_addr: address for the zero page
651 *
652 * Update the xbzrle cache to reflect a page that's been sent as all 0.
653 * The important thing is that a stale (not-yet-0'd) page be replaced
654 * by the new data.
655 * As a bonus, if the page wasn't in the cache it gets added so that
656 * when a small write is made into the 0'd page it gets XBZRLE sent.
657 */
658 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
659 {
660 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
661 return;
662 }
663
664 /* We don't care if this fails to allocate a new cache page
665 * as long as it updated an old one */
666 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
667 ram_counters.dirty_sync_count);
668 }
669
670 #define ENCODING_FLAG_XBZRLE 0x1
671
672 /**
673 * save_xbzrle_page: compress and send current page
674 *
675 * Returns: 1 means that we wrote the page
676 * 0 means that page is identical to the one already sent
677 * -1 means that xbzrle would be longer than normal
678 *
679 * @rs: current RAM state
680 * @current_data: pointer to the address of the page contents
681 * @current_addr: addr of the page
682 * @block: block that contains the page we want to send
683 * @offset: offset inside the block for the page
684 * @last_stage: if we are at the completion stage
685 */
686 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
687 ram_addr_t current_addr, RAMBlock *block,
688 ram_addr_t offset, bool last_stage)
689 {
690 int encoded_len = 0, bytes_xbzrle;
691 uint8_t *prev_cached_page;
692
693 if (!cache_is_cached(XBZRLE.cache, current_addr,
694 ram_counters.dirty_sync_count)) {
695 xbzrle_counters.cache_miss++;
696 if (!last_stage) {
697 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
698 ram_counters.dirty_sync_count) == -1) {
699 return -1;
700 } else {
701 /* update *current_data when the page has been
702 inserted into cache */
703 *current_data = get_cached_data(XBZRLE.cache, current_addr);
704 }
705 }
706 return -1;
707 }
708
709 /*
710 * Reaching here means the page has hit the xbzrle cache, no matter what
711 * encoding result it is (normal encoding, overflow or skipping the page),
712 * count the page as encoded. This is used to calculate the encoding rate.
713 *
714 * Example: 2 pages (8KB) being encoded, first page encoding generates 2KB,
715 * 2nd page turns out to be skipped (i.e. no new bytes written to the
716 * page), the overall encoding rate will be 8KB / 2KB = 4, which has the
717 * skipped page included. In this way, the encoding rate can tell if the
718 * guest page is good for xbzrle encoding.
719 */
720 xbzrle_counters.pages++;
721 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
722
723 /* save current buffer into memory */
724 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
725
726 /* XBZRLE encoding (if there is no overflow) */
727 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
728 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
729 TARGET_PAGE_SIZE);
730
731 /*
732 * Update the cache contents, so that it corresponds to the data
733 * sent, in all cases except where we skip the page.
734 */
735 if (!last_stage && encoded_len != 0) {
736 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
737 /*
738 * In the case where we couldn't compress, ensure that the caller
739 * sends the data from the cache, since the guest might have
740 * changed the RAM since we copied it.
741 */
742 *current_data = prev_cached_page;
743 }
744
745 if (encoded_len == 0) {
746 trace_save_xbzrle_page_skipping();
747 return 0;
748 } else if (encoded_len == -1) {
749 trace_save_xbzrle_page_overflow();
750 xbzrle_counters.overflow++;
751 xbzrle_counters.bytes += TARGET_PAGE_SIZE;
752 return -1;
753 }
754
755 /* Send XBZRLE based compressed page */
756 bytes_xbzrle = save_page_header(rs, rs->f, block,
757 offset | RAM_SAVE_FLAG_XBZRLE);
758 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
759 qemu_put_be16(rs->f, encoded_len);
760 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
761 bytes_xbzrle += encoded_len + 1 + 2;
762 /*
763 * Like compressed_size (please see update_compress_thread_counts),
764 * the xbzrle encoded bytes don't count the 8 byte header with
765 * RAM_SAVE_FLAG_CONTINUE.
766 */
767 xbzrle_counters.bytes += bytes_xbzrle - 8;
768 ram_counters.transferred += bytes_xbzrle;
769
770 return 1;
771 }
772
773 /**
774 * migration_bitmap_find_dirty: find the next dirty page from start
775 *
776 * Returns the page offset within memory region of the start of a dirty page
777 *
778 * @rs: current RAM state
779 * @rb: RAMBlock where to search for dirty pages
780 * @start: page where we start the search
781 */
782 static inline
783 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
784 unsigned long start)
785 {
786 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
787 unsigned long *bitmap = rb->bmap;
788 unsigned long next;
789
790 if (ramblock_is_ignored(rb)) {
791 return size;
792 }
793
794 /*
795 * When the free page optimization is enabled, we need to check the bitmap
796 * to send the non-free pages rather than all the pages in the bulk stage.
797 */
798 if (!rs->fpo_enabled && rs->ram_bulk_stage && start > 0) {
799 next = start + 1;
800 } else {
801 next = find_next_bit(bitmap, size, start);
802 }
803
804 return next;
805 }
806
807 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
808 RAMBlock *rb,
809 unsigned long page)
810 {
811 bool ret;
812
813 qemu_mutex_lock(&rs->bitmap_mutex);
814
815 /*
816 * Clear dirty bitmap if needed. This _must_ be called before we
817 * send any of the page in the chunk because we need to make sure
818 * we can capture further page content changes when we sync dirty
819 * log the next time. So as long as we are going to send any of
820 * the page in the chunk we clear the remote dirty bitmap for all.
821 * Clearing it earlier won't be a problem, but too late will.
822 */
823 if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
824 uint8_t shift = rb->clear_bmap_shift;
825 hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
826 hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size);
827
828 /*
829 * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
830 * can make things easier sometimes since then start address
831 * of the small chunk will always be 64 pages aligned so the
832 * bitmap will always be aligned to unsigned long. We should
833 * even be able to remove this restriction but I'm simply
834 * keeping it.
835 */
836 assert(shift >= 6);
837 trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
838 memory_region_clear_dirty_bitmap(rb->mr, start, size);
839 }
840
841 ret = test_and_clear_bit(page, rb->bmap);
842
843 if (ret) {
844 rs->migration_dirty_pages--;
845 }
846 qemu_mutex_unlock(&rs->bitmap_mutex);
847
848 return ret;
849 }
850
851 /* Called with RCU critical section */
852 static void ramblock_sync_dirty_bitmap(RAMState *rs, RAMBlock *rb)
853 {
854 uint64_t new_dirty_pages =
855 cpu_physical_memory_sync_dirty_bitmap(rb, 0, rb->used_length);
856
857 rs->migration_dirty_pages += new_dirty_pages;
858 rs->num_dirty_pages_period += new_dirty_pages;
859 }
860
861 /**
862 * ram_pagesize_summary: calculate all the pagesizes of a VM
863 *
864 * Returns a summary bitmap of the page sizes of all RAMBlocks
865 *
866 * For VMs with just normal pages this is equivalent to the host page
867 * size. If it's got some huge pages then it's the OR of all the
868 * different page sizes.
869 */
870 uint64_t ram_pagesize_summary(void)
871 {
872 RAMBlock *block;
873 uint64_t summary = 0;
874
875 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
876 summary |= block->page_size;
877 }
878
879 return summary;
880 }
881
882 uint64_t ram_get_total_transferred_pages(void)
883 {
884 return ram_counters.normal + ram_counters.duplicate +
885 compression_counters.pages + xbzrle_counters.pages;
886 }
887
888 static void migration_update_rates(RAMState *rs, int64_t end_time)
889 {
890 uint64_t page_count = rs->target_page_count - rs->target_page_count_prev;
891 double compressed_size;
892
893 /* calculate period counters */
894 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
895 / (end_time - rs->time_last_bitmap_sync);
896
897 if (!page_count) {
898 return;
899 }
900
901 if (migrate_use_xbzrle()) {
902 double encoded_size, unencoded_size;
903
904 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
905 rs->xbzrle_cache_miss_prev) / page_count;
906 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
907 unencoded_size = (xbzrle_counters.pages - rs->xbzrle_pages_prev) *
908 TARGET_PAGE_SIZE;
909 encoded_size = xbzrle_counters.bytes - rs->xbzrle_bytes_prev;
910 if (xbzrle_counters.pages == rs->xbzrle_pages_prev || !encoded_size) {
911 xbzrle_counters.encoding_rate = 0;
912 } else {
913 xbzrle_counters.encoding_rate = unencoded_size / encoded_size;
914 }
915 rs->xbzrle_pages_prev = xbzrle_counters.pages;
916 rs->xbzrle_bytes_prev = xbzrle_counters.bytes;
917 }
918
919 if (migrate_use_compression()) {
920 compression_counters.busy_rate = (double)(compression_counters.busy -
921 rs->compress_thread_busy_prev) / page_count;
922 rs->compress_thread_busy_prev = compression_counters.busy;
923
924 compressed_size = compression_counters.compressed_size -
925 rs->compressed_size_prev;
926 if (compressed_size) {
927 double uncompressed_size = (compression_counters.pages -
928 rs->compress_pages_prev) * TARGET_PAGE_SIZE;
929
930 /* Compression-Ratio = Uncompressed-size / Compressed-size */
931 compression_counters.compression_rate =
932 uncompressed_size / compressed_size;
933
934 rs->compress_pages_prev = compression_counters.pages;
935 rs->compressed_size_prev = compression_counters.compressed_size;
936 }
937 }
938 }
939
940 static void migration_trigger_throttle(RAMState *rs)
941 {
942 MigrationState *s = migrate_get_current();
943 uint64_t threshold = s->parameters.throttle_trigger_threshold;
944
945 uint64_t bytes_xfer_period = ram_counters.transferred - rs->bytes_xfer_prev;
946 uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE;
947 uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
948
949 /* During block migration the auto-converge logic incorrectly detects
950 * that ram migration makes no progress. Avoid this by disabling the
951 * throttling logic during the bulk phase of block migration. */
952 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
953 /* The following detection logic can be refined later. For now:
954 Check to see if the ratio between dirtied bytes and the approx.
955 amount of bytes that just got transferred since the last time
956 we were in this routine reaches the threshold. If that happens
957 twice, start or increase throttling. */
958
959 if ((bytes_dirty_period > bytes_dirty_threshold) &&
960 (++rs->dirty_rate_high_cnt >= 2)) {
961 trace_migration_throttle();
962 rs->dirty_rate_high_cnt = 0;
963 mig_throttle_guest_down(bytes_dirty_period,
964 bytes_dirty_threshold);
965 }
966 }
967 }
968
969 static void migration_bitmap_sync(RAMState *rs)
970 {
971 RAMBlock *block;
972 int64_t end_time;
973
974 ram_counters.dirty_sync_count++;
975
976 if (!rs->time_last_bitmap_sync) {
977 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
978 }
979
980 trace_migration_bitmap_sync_start();
981 memory_global_dirty_log_sync();
982
983 qemu_mutex_lock(&rs->bitmap_mutex);
984 WITH_RCU_READ_LOCK_GUARD() {
985 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
986 ramblock_sync_dirty_bitmap(rs, block);
987 }
988 ram_counters.remaining = ram_bytes_remaining();
989 }
990 qemu_mutex_unlock(&rs->bitmap_mutex);
991
992 memory_global_after_dirty_log_sync();
993 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
994
995 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
996
997 /* more than 1 second = 1000 millisecons */
998 if (end_time > rs->time_last_bitmap_sync + 1000) {
999 migration_trigger_throttle(rs);
1000
1001 migration_update_rates(rs, end_time);
1002
1003 rs->target_page_count_prev = rs->target_page_count;
1004
1005 /* reset period counters */
1006 rs->time_last_bitmap_sync = end_time;
1007 rs->num_dirty_pages_period = 0;
1008 rs->bytes_xfer_prev = ram_counters.transferred;
1009 }
1010 if (migrate_use_events()) {
1011 qapi_event_send_migration_pass(ram_counters.dirty_sync_count);
1012 }
1013 }
1014
1015 static void migration_bitmap_sync_precopy(RAMState *rs)
1016 {
1017 Error *local_err = NULL;
1018
1019 /*
1020 * The current notifier usage is just an optimization to migration, so we
1021 * don't stop the normal migration process in the error case.
1022 */
1023 if (precopy_notify(PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC, &local_err)) {
1024 error_report_err(local_err);
1025 local_err = NULL;
1026 }
1027
1028 migration_bitmap_sync(rs);
1029
1030 if (precopy_notify(PRECOPY_NOTIFY_AFTER_BITMAP_SYNC, &local_err)) {
1031 error_report_err(local_err);
1032 }
1033 }
1034
1035 /**
1036 * save_zero_page_to_file: send the zero page to the file
1037 *
1038 * Returns the size of data written to the file, 0 means the page is not
1039 * a zero page
1040 *
1041 * @rs: current RAM state
1042 * @file: the file where the data is saved
1043 * @block: block that contains the page we want to send
1044 * @offset: offset inside the block for the page
1045 */
1046 static int save_zero_page_to_file(RAMState *rs, QEMUFile *file,
1047 RAMBlock *block, ram_addr_t offset)
1048 {
1049 uint8_t *p = block->host + offset;
1050 int len = 0;
1051
1052 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1053 len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO);
1054 qemu_put_byte(file, 0);
1055 len += 1;
1056 }
1057 return len;
1058 }
1059
1060 /**
1061 * save_zero_page: send the zero page to the stream
1062 *
1063 * Returns the number of pages written.
1064 *
1065 * @rs: current RAM state
1066 * @block: block that contains the page we want to send
1067 * @offset: offset inside the block for the page
1068 */
1069 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1070 {
1071 int len = save_zero_page_to_file(rs, rs->f, block, offset);
1072
1073 if (len) {
1074 ram_counters.duplicate++;
1075 ram_counters.transferred += len;
1076 return 1;
1077 }
1078 return -1;
1079 }
1080
1081 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1082 {
1083 if (!migrate_release_ram() || !migration_in_postcopy()) {
1084 return;
1085 }
1086
1087 ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS);
1088 }
1089
1090 /*
1091 * @pages: the number of pages written by the control path,
1092 * < 0 - error
1093 * > 0 - number of pages written
1094 *
1095 * Return true if the pages has been saved, otherwise false is returned.
1096 */
1097 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1098 int *pages)
1099 {
1100 uint64_t bytes_xmit = 0;
1101 int ret;
1102
1103 *pages = -1;
1104 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1105 &bytes_xmit);
1106 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1107 return false;
1108 }
1109
1110 if (bytes_xmit) {
1111 ram_counters.transferred += bytes_xmit;
1112 *pages = 1;
1113 }
1114
1115 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1116 return true;
1117 }
1118
1119 if (bytes_xmit > 0) {
1120 ram_counters.normal++;
1121 } else if (bytes_xmit == 0) {
1122 ram_counters.duplicate++;
1123 }
1124
1125 return true;
1126 }
1127
1128 /*
1129 * directly send the page to the stream
1130 *
1131 * Returns the number of pages written.
1132 *
1133 * @rs: current RAM state
1134 * @block: block that contains the page we want to send
1135 * @offset: offset inside the block for the page
1136 * @buf: the page to be sent
1137 * @async: send to page asyncly
1138 */
1139 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1140 uint8_t *buf, bool async)
1141 {
1142 ram_counters.transferred += save_page_header(rs, rs->f, block,
1143 offset | RAM_SAVE_FLAG_PAGE);
1144 if (async) {
1145 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1146 migrate_release_ram() &
1147 migration_in_postcopy());
1148 } else {
1149 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1150 }
1151 ram_counters.transferred += TARGET_PAGE_SIZE;
1152 ram_counters.normal++;
1153 return 1;
1154 }
1155
1156 /**
1157 * ram_save_page: send the given page to the stream
1158 *
1159 * Returns the number of pages written.
1160 * < 0 - error
1161 * >=0 - Number of pages written - this might legally be 0
1162 * if xbzrle noticed the page was the same.
1163 *
1164 * @rs: current RAM state
1165 * @block: block that contains the page we want to send
1166 * @offset: offset inside the block for the page
1167 * @last_stage: if we are at the completion stage
1168 */
1169 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1170 {
1171 int pages = -1;
1172 uint8_t *p;
1173 bool send_async = true;
1174 RAMBlock *block = pss->block;
1175 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1176 ram_addr_t current_addr = block->offset + offset;
1177
1178 p = block->host + offset;
1179 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1180
1181 XBZRLE_cache_lock();
1182 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1183 migrate_use_xbzrle()) {
1184 pages = save_xbzrle_page(rs, &p, current_addr, block,
1185 offset, last_stage);
1186 if (!last_stage) {
1187 /* Can't send this cached data async, since the cache page
1188 * might get updated before it gets to the wire
1189 */
1190 send_async = false;
1191 }
1192 }
1193
1194 /* XBZRLE overflow or normal page */
1195 if (pages == -1) {
1196 pages = save_normal_page(rs, block, offset, p, send_async);
1197 }
1198
1199 XBZRLE_cache_unlock();
1200
1201 return pages;
1202 }
1203
1204 static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
1205 ram_addr_t offset)
1206 {
1207 if (multifd_queue_page(rs->f, block, offset) < 0) {
1208 return -1;
1209 }
1210 ram_counters.normal++;
1211
1212 return 1;
1213 }
1214
1215 static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1216 ram_addr_t offset, uint8_t *source_buf)
1217 {
1218 RAMState *rs = ram_state;
1219 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1220 bool zero_page = false;
1221 int ret;
1222
1223 if (save_zero_page_to_file(rs, f, block, offset)) {
1224 zero_page = true;
1225 goto exit;
1226 }
1227
1228 save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
1229
1230 /*
1231 * copy it to a internal buffer to avoid it being modified by VM
1232 * so that we can catch up the error during compression and
1233 * decompression
1234 */
1235 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1236 ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1237 if (ret < 0) {
1238 qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
1239 error_report("compressed data failed!");
1240 return false;
1241 }
1242
1243 exit:
1244 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1245 return zero_page;
1246 }
1247
1248 static void
1249 update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
1250 {
1251 ram_counters.transferred += bytes_xmit;
1252
1253 if (param->zero_page) {
1254 ram_counters.duplicate++;
1255 return;
1256 }
1257
1258 /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
1259 compression_counters.compressed_size += bytes_xmit - 8;
1260 compression_counters.pages++;
1261 }
1262
1263 static bool save_page_use_compression(RAMState *rs);
1264
1265 static void flush_compressed_data(RAMState *rs)
1266 {
1267 int idx, len, thread_count;
1268
1269 if (!save_page_use_compression(rs)) {
1270 return;
1271 }
1272 thread_count = migrate_compress_threads();
1273
1274 qemu_mutex_lock(&comp_done_lock);
1275 for (idx = 0; idx < thread_count; idx++) {
1276 while (!comp_param[idx].done) {
1277 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1278 }
1279 }
1280 qemu_mutex_unlock(&comp_done_lock);
1281
1282 for (idx = 0; idx < thread_count; idx++) {
1283 qemu_mutex_lock(&comp_param[idx].mutex);
1284 if (!comp_param[idx].quit) {
1285 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1286 /*
1287 * it's safe to fetch zero_page without holding comp_done_lock
1288 * as there is no further request submitted to the thread,
1289 * i.e, the thread should be waiting for a request at this point.
1290 */
1291 update_compress_thread_counts(&comp_param[idx], len);
1292 }
1293 qemu_mutex_unlock(&comp_param[idx].mutex);
1294 }
1295 }
1296
1297 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1298 ram_addr_t offset)
1299 {
1300 param->block = block;
1301 param->offset = offset;
1302 }
1303
1304 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1305 ram_addr_t offset)
1306 {
1307 int idx, thread_count, bytes_xmit = -1, pages = -1;
1308 bool wait = migrate_compress_wait_thread();
1309
1310 thread_count = migrate_compress_threads();
1311 qemu_mutex_lock(&comp_done_lock);
1312 retry:
1313 for (idx = 0; idx < thread_count; idx++) {
1314 if (comp_param[idx].done) {
1315 comp_param[idx].done = false;
1316 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1317 qemu_mutex_lock(&comp_param[idx].mutex);
1318 set_compress_params(&comp_param[idx], block, offset);
1319 qemu_cond_signal(&comp_param[idx].cond);
1320 qemu_mutex_unlock(&comp_param[idx].mutex);
1321 pages = 1;
1322 update_compress_thread_counts(&comp_param[idx], bytes_xmit);
1323 break;
1324 }
1325 }
1326
1327 /*
1328 * wait for the free thread if the user specifies 'compress-wait-thread',
1329 * otherwise we will post the page out in the main thread as normal page.
1330 */
1331 if (pages < 0 && wait) {
1332 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1333 goto retry;
1334 }
1335 qemu_mutex_unlock(&comp_done_lock);
1336
1337 return pages;
1338 }
1339
1340 /**
1341 * find_dirty_block: find the next dirty page and update any state
1342 * associated with the search process.
1343 *
1344 * Returns true if a page is found
1345 *
1346 * @rs: current RAM state
1347 * @pss: data about the state of the current dirty page scan
1348 * @again: set to false if the search has scanned the whole of RAM
1349 */
1350 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1351 {
1352 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1353 if (pss->complete_round && pss->block == rs->last_seen_block &&
1354 pss->page >= rs->last_page) {
1355 /*
1356 * We've been once around the RAM and haven't found anything.
1357 * Give up.
1358 */
1359 *again = false;
1360 return false;
1361 }
1362 if ((((ram_addr_t)pss->page) << TARGET_PAGE_BITS)
1363 >= pss->block->used_length) {
1364 /* Didn't find anything in this RAM Block */
1365 pss->page = 0;
1366 pss->block = QLIST_NEXT_RCU(pss->block, next);
1367 if (!pss->block) {
1368 /*
1369 * If memory migration starts over, we will meet a dirtied page
1370 * which may still exists in compression threads's ring, so we
1371 * should flush the compressed data to make sure the new page
1372 * is not overwritten by the old one in the destination.
1373 *
1374 * Also If xbzrle is on, stop using the data compression at this
1375 * point. In theory, xbzrle can do better than compression.
1376 */
1377 flush_compressed_data(rs);
1378
1379 /* Hit the end of the list */
1380 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1381 /* Flag that we've looped */
1382 pss->complete_round = true;
1383 rs->ram_bulk_stage = false;
1384 }
1385 /* Didn't find anything this time, but try again on the new block */
1386 *again = true;
1387 return false;
1388 } else {
1389 /* Can go around again, but... */
1390 *again = true;
1391 /* We've found something so probably don't need to */
1392 return true;
1393 }
1394 }
1395
1396 /**
1397 * unqueue_page: gets a page of the queue
1398 *
1399 * Helper for 'get_queued_page' - gets a page off the queue
1400 *
1401 * Returns the block of the page (or NULL if none available)
1402 *
1403 * @rs: current RAM state
1404 * @offset: used to return the offset within the RAMBlock
1405 */
1406 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1407 {
1408 RAMBlock *block = NULL;
1409
1410 if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) {
1411 return NULL;
1412 }
1413
1414 QEMU_LOCK_GUARD(&rs->src_page_req_mutex);
1415 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1416 struct RAMSrcPageRequest *entry =
1417 QSIMPLEQ_FIRST(&rs->src_page_requests);
1418 block = entry->rb;
1419 *offset = entry->offset;
1420
1421 if (entry->len > TARGET_PAGE_SIZE) {
1422 entry->len -= TARGET_PAGE_SIZE;
1423 entry->offset += TARGET_PAGE_SIZE;
1424 } else {
1425 memory_region_unref(block->mr);
1426 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1427 g_free(entry);
1428 migration_consume_urgent_request();
1429 }
1430 }
1431
1432 return block;
1433 }
1434
1435 /**
1436 * get_queued_page: unqueue a page from the postcopy requests
1437 *
1438 * Skips pages that are already sent (!dirty)
1439 *
1440 * Returns true if a queued page is found
1441 *
1442 * @rs: current RAM state
1443 * @pss: data about the state of the current dirty page scan
1444 */
1445 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1446 {
1447 RAMBlock *block;
1448 ram_addr_t offset;
1449 bool dirty;
1450
1451 do {
1452 block = unqueue_page(rs, &offset);
1453 /*
1454 * We're sending this page, and since it's postcopy nothing else
1455 * will dirty it, and we must make sure it doesn't get sent again
1456 * even if this queue request was received after the background
1457 * search already sent it.
1458 */
1459 if (block) {
1460 unsigned long page;
1461
1462 page = offset >> TARGET_PAGE_BITS;
1463 dirty = test_bit(page, block->bmap);
1464 if (!dirty) {
1465 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1466 page);
1467 } else {
1468 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1469 }
1470 }
1471
1472 } while (block && !dirty);
1473
1474 if (block) {
1475 /*
1476 * As soon as we start servicing pages out of order, then we have
1477 * to kill the bulk stage, since the bulk stage assumes
1478 * in (migration_bitmap_find_and_reset_dirty) that every page is
1479 * dirty, that's no longer true.
1480 */
1481 rs->ram_bulk_stage = false;
1482
1483 /*
1484 * We want the background search to continue from the queued page
1485 * since the guest is likely to want other pages near to the page
1486 * it just requested.
1487 */
1488 pss->block = block;
1489 pss->page = offset >> TARGET_PAGE_BITS;
1490
1491 /*
1492 * This unqueued page would break the "one round" check, even is
1493 * really rare.
1494 */
1495 pss->complete_round = false;
1496 }
1497
1498 return !!block;
1499 }
1500
1501 /**
1502 * migration_page_queue_free: drop any remaining pages in the ram
1503 * request queue
1504 *
1505 * It should be empty at the end anyway, but in error cases there may
1506 * be some left. in case that there is any page left, we drop it.
1507 *
1508 */
1509 static void migration_page_queue_free(RAMState *rs)
1510 {
1511 struct RAMSrcPageRequest *mspr, *next_mspr;
1512 /* This queue generally should be empty - but in the case of a failed
1513 * migration might have some droppings in.
1514 */
1515 RCU_READ_LOCK_GUARD();
1516 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1517 memory_region_unref(mspr->rb->mr);
1518 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1519 g_free(mspr);
1520 }
1521 }
1522
1523 /**
1524 * ram_save_queue_pages: queue the page for transmission
1525 *
1526 * A request from postcopy destination for example.
1527 *
1528 * Returns zero on success or negative on error
1529 *
1530 * @rbname: Name of the RAMBLock of the request. NULL means the
1531 * same that last one.
1532 * @start: starting address from the start of the RAMBlock
1533 * @len: length (in bytes) to send
1534 */
1535 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1536 {
1537 RAMBlock *ramblock;
1538 RAMState *rs = ram_state;
1539
1540 ram_counters.postcopy_requests++;
1541 RCU_READ_LOCK_GUARD();
1542
1543 if (!rbname) {
1544 /* Reuse last RAMBlock */
1545 ramblock = rs->last_req_rb;
1546
1547 if (!ramblock) {
1548 /*
1549 * Shouldn't happen, we can't reuse the last RAMBlock if
1550 * it's the 1st request.
1551 */
1552 error_report("ram_save_queue_pages no previous block");
1553 return -1;
1554 }
1555 } else {
1556 ramblock = qemu_ram_block_by_name(rbname);
1557
1558 if (!ramblock) {
1559 /* We shouldn't be asked for a non-existent RAMBlock */
1560 error_report("ram_save_queue_pages no block '%s'", rbname);
1561 return -1;
1562 }
1563 rs->last_req_rb = ramblock;
1564 }
1565 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1566 if (start+len > ramblock->used_length) {
1567 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1568 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1569 __func__, start, len, ramblock->used_length);
1570 return -1;
1571 }
1572
1573 struct RAMSrcPageRequest *new_entry =
1574 g_malloc0(sizeof(struct RAMSrcPageRequest));
1575 new_entry->rb = ramblock;
1576 new_entry->offset = start;
1577 new_entry->len = len;
1578
1579 memory_region_ref(ramblock->mr);
1580 qemu_mutex_lock(&rs->src_page_req_mutex);
1581 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1582 migration_make_urgent_request();
1583 qemu_mutex_unlock(&rs->src_page_req_mutex);
1584
1585 return 0;
1586 }
1587
1588 static bool save_page_use_compression(RAMState *rs)
1589 {
1590 if (!migrate_use_compression()) {
1591 return false;
1592 }
1593
1594 /*
1595 * If xbzrle is on, stop using the data compression after first
1596 * round of migration even if compression is enabled. In theory,
1597 * xbzrle can do better than compression.
1598 */
1599 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1600 return true;
1601 }
1602
1603 return false;
1604 }
1605
1606 /*
1607 * try to compress the page before posting it out, return true if the page
1608 * has been properly handled by compression, otherwise needs other
1609 * paths to handle it
1610 */
1611 static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1612 {
1613 if (!save_page_use_compression(rs)) {
1614 return false;
1615 }
1616
1617 /*
1618 * When starting the process of a new block, the first page of
1619 * the block should be sent out before other pages in the same
1620 * block, and all the pages in last block should have been sent
1621 * out, keeping this order is important, because the 'cont' flag
1622 * is used to avoid resending the block name.
1623 *
1624 * We post the fist page as normal page as compression will take
1625 * much CPU resource.
1626 */
1627 if (block != rs->last_sent_block) {
1628 flush_compressed_data(rs);
1629 return false;
1630 }
1631
1632 if (compress_page_with_multi_thread(rs, block, offset) > 0) {
1633 return true;
1634 }
1635
1636 compression_counters.busy++;
1637 return false;
1638 }
1639
1640 /**
1641 * ram_save_target_page: save one target page
1642 *
1643 * Returns the number of pages written
1644 *
1645 * @rs: current RAM state
1646 * @pss: data about the page we want to send
1647 * @last_stage: if we are at the completion stage
1648 */
1649 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1650 bool last_stage)
1651 {
1652 RAMBlock *block = pss->block;
1653 ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS;
1654 int res;
1655
1656 if (control_save_page(rs, block, offset, &res)) {
1657 return res;
1658 }
1659
1660 if (save_compress_page(rs, block, offset)) {
1661 return 1;
1662 }
1663
1664 res = save_zero_page(rs, block, offset);
1665 if (res > 0) {
1666 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
1667 * page would be stale
1668 */
1669 if (!save_page_use_compression(rs)) {
1670 XBZRLE_cache_lock();
1671 xbzrle_cache_zero_page(rs, block->offset + offset);
1672 XBZRLE_cache_unlock();
1673 }
1674 ram_release_pages(block->idstr, offset, res);
1675 return res;
1676 }
1677
1678 /*
1679 * Do not use multifd for:
1680 * 1. Compression as the first page in the new block should be posted out
1681 * before sending the compressed page
1682 * 2. In postcopy as one whole host page should be placed
1683 */
1684 if (!save_page_use_compression(rs) && migrate_use_multifd()
1685 && !migration_in_postcopy()) {
1686 return ram_save_multifd_page(rs, block, offset);
1687 }
1688
1689 return ram_save_page(rs, pss, last_stage);
1690 }
1691
1692 /**
1693 * ram_save_host_page: save a whole host page
1694 *
1695 * Starting at *offset send pages up to the end of the current host
1696 * page. It's valid for the initial offset to point into the middle of
1697 * a host page in which case the remainder of the hostpage is sent.
1698 * Only dirty target pages are sent. Note that the host page size may
1699 * be a huge page for this block.
1700 * The saving stops at the boundary of the used_length of the block
1701 * if the RAMBlock isn't a multiple of the host page size.
1702 *
1703 * Returns the number of pages written or negative on error
1704 *
1705 * @rs: current RAM state
1706 * @ms: current migration state
1707 * @pss: data about the page we want to send
1708 * @last_stage: if we are at the completion stage
1709 */
1710 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1711 bool last_stage)
1712 {
1713 int tmppages, pages = 0;
1714 size_t pagesize_bits =
1715 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1716
1717 if (ramblock_is_ignored(pss->block)) {
1718 error_report("block %s should not be migrated !", pss->block->idstr);
1719 return 0;
1720 }
1721
1722 do {
1723 /* Check the pages is dirty and if it is send it */
1724 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
1725 pss->page++;
1726 continue;
1727 }
1728
1729 tmppages = ram_save_target_page(rs, pss, last_stage);
1730 if (tmppages < 0) {
1731 return tmppages;
1732 }
1733
1734 pages += tmppages;
1735 pss->page++;
1736 /* Allow rate limiting to happen in the middle of huge pages */
1737 migration_rate_limit();
1738 } while ((pss->page & (pagesize_bits - 1)) &&
1739 offset_in_ramblock(pss->block,
1740 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
1741
1742 /* The offset we leave with is the last one we looked at */
1743 pss->page--;
1744 return pages;
1745 }
1746
1747 /**
1748 * ram_find_and_save_block: finds a dirty page and sends it to f
1749 *
1750 * Called within an RCU critical section.
1751 *
1752 * Returns the number of pages written where zero means no dirty pages,
1753 * or negative on error
1754 *
1755 * @rs: current RAM state
1756 * @last_stage: if we are at the completion stage
1757 *
1758 * On systems where host-page-size > target-page-size it will send all the
1759 * pages in a host page that are dirty.
1760 */
1761
1762 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1763 {
1764 PageSearchStatus pss;
1765 int pages = 0;
1766 bool again, found;
1767
1768 /* No dirty page as there is zero RAM */
1769 if (!ram_bytes_total()) {
1770 return pages;
1771 }
1772
1773 pss.block = rs->last_seen_block;
1774 pss.page = rs->last_page;
1775 pss.complete_round = false;
1776
1777 if (!pss.block) {
1778 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1779 }
1780
1781 do {
1782 again = true;
1783 found = get_queued_page(rs, &pss);
1784
1785 if (!found) {
1786 /* priority queue empty, so just search for something dirty */
1787 found = find_dirty_block(rs, &pss, &again);
1788 }
1789
1790 if (found) {
1791 pages = ram_save_host_page(rs, &pss, last_stage);
1792 }
1793 } while (!pages && again);
1794
1795 rs->last_seen_block = pss.block;
1796 rs->last_page = pss.page;
1797
1798 return pages;
1799 }
1800
1801 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1802 {
1803 uint64_t pages = size / TARGET_PAGE_SIZE;
1804
1805 if (zero) {
1806 ram_counters.duplicate += pages;
1807 } else {
1808 ram_counters.normal += pages;
1809 ram_counters.transferred += size;
1810 qemu_update_position(f, size);
1811 }
1812 }
1813
1814 static uint64_t ram_bytes_total_common(bool count_ignored)
1815 {
1816 RAMBlock *block;
1817 uint64_t total = 0;
1818
1819 RCU_READ_LOCK_GUARD();
1820
1821 if (count_ignored) {
1822 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1823 total += block->used_length;
1824 }
1825 } else {
1826 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1827 total += block->used_length;
1828 }
1829 }
1830 return total;
1831 }
1832
1833 uint64_t ram_bytes_total(void)
1834 {
1835 return ram_bytes_total_common(false);
1836 }
1837
1838 static void xbzrle_load_setup(void)
1839 {
1840 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
1841 }
1842
1843 static void xbzrle_load_cleanup(void)
1844 {
1845 g_free(XBZRLE.decoded_buf);
1846 XBZRLE.decoded_buf = NULL;
1847 }
1848
1849 static void ram_state_cleanup(RAMState **rsp)
1850 {
1851 if (*rsp) {
1852 migration_page_queue_free(*rsp);
1853 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1854 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1855 g_free(*rsp);
1856 *rsp = NULL;
1857 }
1858 }
1859
1860 static void xbzrle_cleanup(void)
1861 {
1862 XBZRLE_cache_lock();
1863 if (XBZRLE.cache) {
1864 cache_fini(XBZRLE.cache);
1865 g_free(XBZRLE.encoded_buf);
1866 g_free(XBZRLE.current_buf);
1867 g_free(XBZRLE.zero_target_page);
1868 XBZRLE.cache = NULL;
1869 XBZRLE.encoded_buf = NULL;
1870 XBZRLE.current_buf = NULL;
1871 XBZRLE.zero_target_page = NULL;
1872 }
1873 XBZRLE_cache_unlock();
1874 }
1875
1876 static void ram_save_cleanup(void *opaque)
1877 {
1878 RAMState **rsp = opaque;
1879 RAMBlock *block;
1880
1881 /* caller have hold iothread lock or is in a bh, so there is
1882 * no writing race against the migration bitmap
1883 */
1884 memory_global_dirty_log_stop();
1885
1886 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1887 g_free(block->clear_bmap);
1888 block->clear_bmap = NULL;
1889 g_free(block->bmap);
1890 block->bmap = NULL;
1891 }
1892
1893 xbzrle_cleanup();
1894 compress_threads_save_cleanup();
1895 ram_state_cleanup(rsp);
1896 }
1897
1898 static void ram_state_reset(RAMState *rs)
1899 {
1900 rs->last_seen_block = NULL;
1901 rs->last_sent_block = NULL;
1902 rs->last_page = 0;
1903 rs->last_version = ram_list.version;
1904 rs->ram_bulk_stage = true;
1905 rs->fpo_enabled = false;
1906 }
1907
1908 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1909
1910 /*
1911 * 'expected' is the value you expect the bitmap mostly to be full
1912 * of; it won't bother printing lines that are all this value.
1913 * If 'todump' is null the migration bitmap is dumped.
1914 */
1915 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1916 unsigned long pages)
1917 {
1918 int64_t cur;
1919 int64_t linelen = 128;
1920 char linebuf[129];
1921
1922 for (cur = 0; cur < pages; cur += linelen) {
1923 int64_t curb;
1924 bool found = false;
1925 /*
1926 * Last line; catch the case where the line length
1927 * is longer than remaining ram
1928 */
1929 if (cur + linelen > pages) {
1930 linelen = pages - cur;
1931 }
1932 for (curb = 0; curb < linelen; curb++) {
1933 bool thisbit = test_bit(cur + curb, todump);
1934 linebuf[curb] = thisbit ? '1' : '.';
1935 found = found || (thisbit != expected);
1936 }
1937 if (found) {
1938 linebuf[curb] = '\0';
1939 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1940 }
1941 }
1942 }
1943
1944 /* **** functions for postcopy ***** */
1945
1946 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1947 {
1948 struct RAMBlock *block;
1949
1950 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
1951 unsigned long *bitmap = block->bmap;
1952 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1953 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
1954
1955 while (run_start < range) {
1956 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1957 ram_discard_range(block->idstr,
1958 ((ram_addr_t)run_start) << TARGET_PAGE_BITS,
1959 ((ram_addr_t)(run_end - run_start))
1960 << TARGET_PAGE_BITS);
1961 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1962 }
1963 }
1964 }
1965
1966 /**
1967 * postcopy_send_discard_bm_ram: discard a RAMBlock
1968 *
1969 * Returns zero on success
1970 *
1971 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1972 *
1973 * @ms: current migration state
1974 * @block: RAMBlock to discard
1975 */
1976 static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block)
1977 {
1978 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
1979 unsigned long current;
1980 unsigned long *bitmap = block->bmap;
1981
1982 for (current = 0; current < end; ) {
1983 unsigned long one = find_next_bit(bitmap, end, current);
1984 unsigned long zero, discard_length;
1985
1986 if (one >= end) {
1987 break;
1988 }
1989
1990 zero = find_next_zero_bit(bitmap, end, one + 1);
1991
1992 if (zero >= end) {
1993 discard_length = end - one;
1994 } else {
1995 discard_length = zero - one;
1996 }
1997 postcopy_discard_send_range(ms, one, discard_length);
1998 current = one + discard_length;
1999 }
2000
2001 return 0;
2002 }
2003
2004 /**
2005 * postcopy_each_ram_send_discard: discard all RAMBlocks
2006 *
2007 * Returns 0 for success or negative for error
2008 *
2009 * Utility for the outgoing postcopy code.
2010 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2011 * passing it bitmap indexes and name.
2012 * (qemu_ram_foreach_block ends up passing unscaled lengths
2013 * which would mean postcopy code would have to deal with target page)
2014 *
2015 * @ms: current migration state
2016 */
2017 static int postcopy_each_ram_send_discard(MigrationState *ms)
2018 {
2019 struct RAMBlock *block;
2020 int ret;
2021
2022 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2023 postcopy_discard_send_init(ms, block->idstr);
2024
2025 /*
2026 * Postcopy sends chunks of bitmap over the wire, but it
2027 * just needs indexes at this point, avoids it having
2028 * target page specific code.
2029 */
2030 ret = postcopy_send_discard_bm_ram(ms, block);
2031 postcopy_discard_send_finish(ms);
2032 if (ret) {
2033 return ret;
2034 }
2035 }
2036
2037 return 0;
2038 }
2039
2040 /**
2041 * postcopy_chunk_hostpages_pass: canonicalize bitmap in hostpages
2042 *
2043 * Helper for postcopy_chunk_hostpages; it's called twice to
2044 * canonicalize the two bitmaps, that are similar, but one is
2045 * inverted.
2046 *
2047 * Postcopy requires that all target pages in a hostpage are dirty or
2048 * clean, not a mix. This function canonicalizes the bitmaps.
2049 *
2050 * @ms: current migration state
2051 * @block: block that contains the page we want to canonicalize
2052 */
2053 static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block)
2054 {
2055 RAMState *rs = ram_state;
2056 unsigned long *bitmap = block->bmap;
2057 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2058 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2059 unsigned long run_start;
2060
2061 if (block->page_size == TARGET_PAGE_SIZE) {
2062 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2063 return;
2064 }
2065
2066 /* Find a dirty page */
2067 run_start = find_next_bit(bitmap, pages, 0);
2068
2069 while (run_start < pages) {
2070
2071 /*
2072 * If the start of this run of pages is in the middle of a host
2073 * page, then we need to fixup this host page.
2074 */
2075 if (QEMU_IS_ALIGNED(run_start, host_ratio)) {
2076 /* Find the end of this run */
2077 run_start = find_next_zero_bit(bitmap, pages, run_start + 1);
2078 /*
2079 * If the end isn't at the start of a host page, then the
2080 * run doesn't finish at the end of a host page
2081 * and we need to discard.
2082 */
2083 }
2084
2085 if (!QEMU_IS_ALIGNED(run_start, host_ratio)) {
2086 unsigned long page;
2087 unsigned long fixup_start_addr = QEMU_ALIGN_DOWN(run_start,
2088 host_ratio);
2089 run_start = QEMU_ALIGN_UP(run_start, host_ratio);
2090
2091 /* Clean up the bitmap */
2092 for (page = fixup_start_addr;
2093 page < fixup_start_addr + host_ratio; page++) {
2094 /*
2095 * Remark them as dirty, updating the count for any pages
2096 * that weren't previously dirty.
2097 */
2098 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2099 }
2100 }
2101
2102 /* Find the next dirty page for the next iteration */
2103 run_start = find_next_bit(bitmap, pages, run_start);
2104 }
2105 }
2106
2107 /**
2108 * postcopy_chunk_hostpages: discard any partially sent host page
2109 *
2110 * Utility for the outgoing postcopy code.
2111 *
2112 * Discard any partially sent host-page size chunks, mark any partially
2113 * dirty host-page size chunks as all dirty. In this case the host-page
2114 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
2115 *
2116 * Returns zero on success
2117 *
2118 * @ms: current migration state
2119 * @block: block we want to work with
2120 */
2121 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2122 {
2123 postcopy_discard_send_init(ms, block->idstr);
2124
2125 /*
2126 * Ensure that all partially dirty host pages are made fully dirty.
2127 */
2128 postcopy_chunk_hostpages_pass(ms, block);
2129
2130 postcopy_discard_send_finish(ms);
2131 return 0;
2132 }
2133
2134 /**
2135 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2136 *
2137 * Returns zero on success
2138 *
2139 * Transmit the set of pages to be discarded after precopy to the target
2140 * these are pages that:
2141 * a) Have been previously transmitted but are now dirty again
2142 * b) Pages that have never been transmitted, this ensures that
2143 * any pages on the destination that have been mapped by background
2144 * tasks get discarded (transparent huge pages is the specific concern)
2145 * Hopefully this is pretty sparse
2146 *
2147 * @ms: current migration state
2148 */
2149 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2150 {
2151 RAMState *rs = ram_state;
2152 RAMBlock *block;
2153 int ret;
2154
2155 RCU_READ_LOCK_GUARD();
2156
2157 /* This should be our last sync, the src is now paused */
2158 migration_bitmap_sync(rs);
2159
2160 /* Easiest way to make sure we don't resume in the middle of a host-page */
2161 rs->last_seen_block = NULL;
2162 rs->last_sent_block = NULL;
2163 rs->last_page = 0;
2164
2165 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2166 /* Deal with TPS != HPS and huge pages */
2167 ret = postcopy_chunk_hostpages(ms, block);
2168 if (ret) {
2169 return ret;
2170 }
2171
2172 #ifdef DEBUG_POSTCOPY
2173 ram_debug_dump_bitmap(block->bmap, true,
2174 block->used_length >> TARGET_PAGE_BITS);
2175 #endif
2176 }
2177 trace_ram_postcopy_send_discard_bitmap();
2178
2179 return postcopy_each_ram_send_discard(ms);
2180 }
2181
2182 /**
2183 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2184 *
2185 * Returns zero on success
2186 *
2187 * @rbname: name of the RAMBlock of the request. NULL means the
2188 * same that last one.
2189 * @start: RAMBlock starting page
2190 * @length: RAMBlock size
2191 */
2192 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2193 {
2194 trace_ram_discard_range(rbname, start, length);
2195
2196 RCU_READ_LOCK_GUARD();
2197 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2198
2199 if (!rb) {
2200 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2201 return -1;
2202 }
2203
2204 /*
2205 * On source VM, we don't need to update the received bitmap since
2206 * we don't even have one.
2207 */
2208 if (rb->receivedmap) {
2209 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2210 length >> qemu_target_page_bits());
2211 }
2212
2213 return ram_block_discard_range(rb, start, length);
2214 }
2215
2216 /*
2217 * For every allocation, we will try not to crash the VM if the
2218 * allocation failed.
2219 */
2220 static int xbzrle_init(void)
2221 {
2222 Error *local_err = NULL;
2223
2224 if (!migrate_use_xbzrle()) {
2225 return 0;
2226 }
2227
2228 XBZRLE_cache_lock();
2229
2230 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2231 if (!XBZRLE.zero_target_page) {
2232 error_report("%s: Error allocating zero page", __func__);
2233 goto err_out;
2234 }
2235
2236 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2237 TARGET_PAGE_SIZE, &local_err);
2238 if (!XBZRLE.cache) {
2239 error_report_err(local_err);
2240 goto free_zero_page;
2241 }
2242
2243 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2244 if (!XBZRLE.encoded_buf) {
2245 error_report("%s: Error allocating encoded_buf", __func__);
2246 goto free_cache;
2247 }
2248
2249 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2250 if (!XBZRLE.current_buf) {
2251 error_report("%s: Error allocating current_buf", __func__);
2252 goto free_encoded_buf;
2253 }
2254
2255 /* We are all good */
2256 XBZRLE_cache_unlock();
2257 return 0;
2258
2259 free_encoded_buf:
2260 g_free(XBZRLE.encoded_buf);
2261 XBZRLE.encoded_buf = NULL;
2262 free_cache:
2263 cache_fini(XBZRLE.cache);
2264 XBZRLE.cache = NULL;
2265 free_zero_page:
2266 g_free(XBZRLE.zero_target_page);
2267 XBZRLE.zero_target_page = NULL;
2268 err_out:
2269 XBZRLE_cache_unlock();
2270 return -ENOMEM;
2271 }
2272
2273 static int ram_state_init(RAMState **rsp)
2274 {
2275 *rsp = g_try_new0(RAMState, 1);
2276
2277 if (!*rsp) {
2278 error_report("%s: Init ramstate fail", __func__);
2279 return -1;
2280 }
2281
2282 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2283 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2284 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2285
2286 /*
2287 * Count the total number of pages used by ram blocks not including any
2288 * gaps due to alignment or unplugs.
2289 * This must match with the initial values of dirty bitmap.
2290 */
2291 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2292 ram_state_reset(*rsp);
2293
2294 return 0;
2295 }
2296
2297 static void ram_list_init_bitmaps(void)
2298 {
2299 MigrationState *ms = migrate_get_current();
2300 RAMBlock *block;
2301 unsigned long pages;
2302 uint8_t shift;
2303
2304 /* Skip setting bitmap if there is no RAM */
2305 if (ram_bytes_total()) {
2306 shift = ms->clear_bitmap_shift;
2307 if (shift > CLEAR_BITMAP_SHIFT_MAX) {
2308 error_report("clear_bitmap_shift (%u) too big, using "
2309 "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
2310 shift = CLEAR_BITMAP_SHIFT_MAX;
2311 } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
2312 error_report("clear_bitmap_shift (%u) too small, using "
2313 "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
2314 shift = CLEAR_BITMAP_SHIFT_MIN;
2315 }
2316
2317 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2318 pages = block->max_length >> TARGET_PAGE_BITS;
2319 /*
2320 * The initial dirty bitmap for migration must be set with all
2321 * ones to make sure we'll migrate every guest RAM page to
2322 * destination.
2323 * Here we set RAMBlock.bmap all to 1 because when rebegin a
2324 * new migration after a failed migration, ram_list.
2325 * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
2326 * guest memory.
2327 */
2328 block->bmap = bitmap_new(pages);
2329 bitmap_set(block->bmap, 0, pages);
2330 block->clear_bmap_shift = shift;
2331 block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
2332 }
2333 }
2334 }
2335
2336 static void ram_init_bitmaps(RAMState *rs)
2337 {
2338 /* For memory_global_dirty_log_start below. */
2339 qemu_mutex_lock_iothread();
2340 qemu_mutex_lock_ramlist();
2341
2342 WITH_RCU_READ_LOCK_GUARD() {
2343 ram_list_init_bitmaps();
2344 memory_global_dirty_log_start();
2345 migration_bitmap_sync_precopy(rs);
2346 }
2347 qemu_mutex_unlock_ramlist();
2348 qemu_mutex_unlock_iothread();
2349 }
2350
2351 static int ram_init_all(RAMState **rsp)
2352 {
2353 if (ram_state_init(rsp)) {
2354 return -1;
2355 }
2356
2357 if (xbzrle_init()) {
2358 ram_state_cleanup(rsp);
2359 return -1;
2360 }
2361
2362 ram_init_bitmaps(*rsp);
2363
2364 return 0;
2365 }
2366
2367 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2368 {
2369 RAMBlock *block;
2370 uint64_t pages = 0;
2371
2372 /*
2373 * Postcopy is not using xbzrle/compression, so no need for that.
2374 * Also, since source are already halted, we don't need to care
2375 * about dirty page logging as well.
2376 */
2377
2378 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
2379 pages += bitmap_count_one(block->bmap,
2380 block->used_length >> TARGET_PAGE_BITS);
2381 }
2382
2383 /* This may not be aligned with current bitmaps. Recalculate. */
2384 rs->migration_dirty_pages = pages;
2385
2386 rs->last_seen_block = NULL;
2387 rs->last_sent_block = NULL;
2388 rs->last_page = 0;
2389 rs->last_version = ram_list.version;
2390 /*
2391 * Disable the bulk stage, otherwise we'll resend the whole RAM no
2392 * matter what we have sent.
2393 */
2394 rs->ram_bulk_stage = false;
2395
2396 /* Update RAMState cache of output QEMUFile */
2397 rs->f = out;
2398
2399 trace_ram_state_resume_prepare(pages);
2400 }
2401
2402 /*
2403 * This function clears bits of the free pages reported by the caller from the
2404 * migration dirty bitmap. @addr is the host address corresponding to the
2405 * start of the continuous guest free pages, and @len is the total bytes of
2406 * those pages.
2407 */
2408 void qemu_guest_free_page_hint(void *addr, size_t len)
2409 {
2410 RAMBlock *block;
2411 ram_addr_t offset;
2412 size_t used_len, start, npages;
2413 MigrationState *s = migrate_get_current();
2414
2415 /* This function is currently expected to be used during live migration */
2416 if (!migration_is_setup_or_active(s->state)) {
2417 return;
2418 }
2419
2420 for (; len > 0; len -= used_len, addr += used_len) {
2421 block = qemu_ram_block_from_host(addr, false, &offset);
2422 if (unlikely(!block || offset >= block->used_length)) {
2423 /*
2424 * The implementation might not support RAMBlock resize during
2425 * live migration, but it could happen in theory with future
2426 * updates. So we add a check here to capture that case.
2427 */
2428 error_report_once("%s unexpected error", __func__);
2429 return;
2430 }
2431
2432 if (len <= block->used_length - offset) {
2433 used_len = len;
2434 } else {
2435 used_len = block->used_length - offset;
2436 }
2437
2438 start = offset >> TARGET_PAGE_BITS;
2439 npages = used_len >> TARGET_PAGE_BITS;
2440
2441 qemu_mutex_lock(&ram_state->bitmap_mutex);
2442 ram_state->migration_dirty_pages -=
2443 bitmap_count_one_with_offset(block->bmap, start, npages);
2444 bitmap_clear(block->bmap, start, npages);
2445 qemu_mutex_unlock(&ram_state->bitmap_mutex);
2446 }
2447 }
2448
2449 /*
2450 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2451 * long-running RCU critical section. When rcu-reclaims in the code
2452 * start to become numerous it will be necessary to reduce the
2453 * granularity of these critical sections.
2454 */
2455
2456 /**
2457 * ram_save_setup: Setup RAM for migration
2458 *
2459 * Returns zero to indicate success and negative for error
2460 *
2461 * @f: QEMUFile where to send the data
2462 * @opaque: RAMState pointer
2463 */
2464 static int ram_save_setup(QEMUFile *f, void *opaque)
2465 {
2466 RAMState **rsp = opaque;
2467 RAMBlock *block;
2468
2469 if (compress_threads_save_setup()) {
2470 return -1;
2471 }
2472
2473 /* migration has already setup the bitmap, reuse it. */
2474 if (!migration_in_colo_state()) {
2475 if (ram_init_all(rsp) != 0) {
2476 compress_threads_save_cleanup();
2477 return -1;
2478 }
2479 }
2480 (*rsp)->f = f;
2481
2482 WITH_RCU_READ_LOCK_GUARD() {
2483 qemu_put_be64(f, ram_bytes_total_common(true) | RAM_SAVE_FLAG_MEM_SIZE);
2484
2485 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2486 qemu_put_byte(f, strlen(block->idstr));
2487 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2488 qemu_put_be64(f, block->used_length);
2489 if (migrate_postcopy_ram() && block->page_size !=
2490 qemu_host_page_size) {
2491 qemu_put_be64(f, block->page_size);
2492 }
2493 if (migrate_ignore_shared()) {
2494 qemu_put_be64(f, block->mr->addr);
2495 }
2496 }
2497 }
2498
2499 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2500 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2501
2502 multifd_send_sync_main(f);
2503 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2504 qemu_fflush(f);
2505
2506 return 0;
2507 }
2508
2509 /**
2510 * ram_save_iterate: iterative stage for migration
2511 *
2512 * Returns zero to indicate success and negative for error
2513 *
2514 * @f: QEMUFile where to send the data
2515 * @opaque: RAMState pointer
2516 */
2517 static int ram_save_iterate(QEMUFile *f, void *opaque)
2518 {
2519 RAMState **temp = opaque;
2520 RAMState *rs = *temp;
2521 int ret = 0;
2522 int i;
2523 int64_t t0;
2524 int done = 0;
2525
2526 if (blk_mig_bulk_active()) {
2527 /* Avoid transferring ram during bulk phase of block migration as
2528 * the bulk phase will usually take a long time and transferring
2529 * ram updates during that time is pointless. */
2530 goto out;
2531 }
2532
2533 WITH_RCU_READ_LOCK_GUARD() {
2534 if (ram_list.version != rs->last_version) {
2535 ram_state_reset(rs);
2536 }
2537
2538 /* Read version before ram_list.blocks */
2539 smp_rmb();
2540
2541 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2542
2543 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2544 i = 0;
2545 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2546 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2547 int pages;
2548
2549 if (qemu_file_get_error(f)) {
2550 break;
2551 }
2552
2553 pages = ram_find_and_save_block(rs, false);
2554 /* no more pages to sent */
2555 if (pages == 0) {
2556 done = 1;
2557 break;
2558 }
2559
2560 if (pages < 0) {
2561 qemu_file_set_error(f, pages);
2562 break;
2563 }
2564
2565 rs->target_page_count += pages;
2566
2567 /*
2568 * During postcopy, it is necessary to make sure one whole host
2569 * page is sent in one chunk.
2570 */
2571 if (migrate_postcopy_ram()) {
2572 flush_compressed_data(rs);
2573 }
2574
2575 /*
2576 * we want to check in the 1st loop, just in case it was the 1st
2577 * time and we had to sync the dirty bitmap.
2578 * qemu_clock_get_ns() is a bit expensive, so we only check each
2579 * some iterations
2580 */
2581 if ((i & 63) == 0) {
2582 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) /
2583 1000000;
2584 if (t1 > MAX_WAIT) {
2585 trace_ram_save_iterate_big_wait(t1, i);
2586 break;
2587 }
2588 }
2589 i++;
2590 }
2591 }
2592
2593 /*
2594 * Must occur before EOS (or any QEMUFile operation)
2595 * because of RDMA protocol.
2596 */
2597 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2598
2599 out:
2600 if (ret >= 0
2601 && migration_is_setup_or_active(migrate_get_current()->state)) {
2602 multifd_send_sync_main(rs->f);
2603 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2604 qemu_fflush(f);
2605 ram_counters.transferred += 8;
2606
2607 ret = qemu_file_get_error(f);
2608 }
2609 if (ret < 0) {
2610 return ret;
2611 }
2612
2613 return done;
2614 }
2615
2616 /**
2617 * ram_save_complete: function called to send the remaining amount of ram
2618 *
2619 * Returns zero to indicate success or negative on error
2620 *
2621 * Called with iothread lock
2622 *
2623 * @f: QEMUFile where to send the data
2624 * @opaque: RAMState pointer
2625 */
2626 static int ram_save_complete(QEMUFile *f, void *opaque)
2627 {
2628 RAMState **temp = opaque;
2629 RAMState *rs = *temp;
2630 int ret = 0;
2631
2632 WITH_RCU_READ_LOCK_GUARD() {
2633 if (!migration_in_postcopy()) {
2634 migration_bitmap_sync_precopy(rs);
2635 }
2636
2637 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2638
2639 /* try transferring iterative blocks of memory */
2640
2641 /* flush all remaining blocks regardless of rate limiting */
2642 while (true) {
2643 int pages;
2644
2645 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2646 /* no more blocks to sent */
2647 if (pages == 0) {
2648 break;
2649 }
2650 if (pages < 0) {
2651 ret = pages;
2652 break;
2653 }
2654 }
2655
2656 flush_compressed_data(rs);
2657 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2658 }
2659
2660 if (ret >= 0) {
2661 multifd_send_sync_main(rs->f);
2662 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2663 qemu_fflush(f);
2664 }
2665
2666 return ret;
2667 }
2668
2669 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2670 uint64_t *res_precopy_only,
2671 uint64_t *res_compatible,
2672 uint64_t *res_postcopy_only)
2673 {
2674 RAMState **temp = opaque;
2675 RAMState *rs = *temp;
2676 uint64_t remaining_size;
2677
2678 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2679
2680 if (!migration_in_postcopy() &&
2681 remaining_size < max_size) {
2682 qemu_mutex_lock_iothread();
2683 WITH_RCU_READ_LOCK_GUARD() {
2684 migration_bitmap_sync_precopy(rs);
2685 }
2686 qemu_mutex_unlock_iothread();
2687 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2688 }
2689
2690 if (migrate_postcopy_ram()) {
2691 /* We can do postcopy, and all the data is postcopiable */
2692 *res_compatible += remaining_size;
2693 } else {
2694 *res_precopy_only += remaining_size;
2695 }
2696 }
2697
2698 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2699 {
2700 unsigned int xh_len;
2701 int xh_flags;
2702 uint8_t *loaded_data;
2703
2704 /* extract RLE header */
2705 xh_flags = qemu_get_byte(f);
2706 xh_len = qemu_get_be16(f);
2707
2708 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2709 error_report("Failed to load XBZRLE page - wrong compression!");
2710 return -1;
2711 }
2712
2713 if (xh_len > TARGET_PAGE_SIZE) {
2714 error_report("Failed to load XBZRLE page - len overflow!");
2715 return -1;
2716 }
2717 loaded_data = XBZRLE.decoded_buf;
2718 /* load data and decode */
2719 /* it can change loaded_data to point to an internal buffer */
2720 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2721
2722 /* decode RLE */
2723 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2724 TARGET_PAGE_SIZE) == -1) {
2725 error_report("Failed to load XBZRLE page - decode error!");
2726 return -1;
2727 }
2728
2729 return 0;
2730 }
2731
2732 /**
2733 * ram_block_from_stream: read a RAMBlock id from the migration stream
2734 *
2735 * Must be called from within a rcu critical section.
2736 *
2737 * Returns a pointer from within the RCU-protected ram_list.
2738 *
2739 * @f: QEMUFile where to read the data from
2740 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2741 */
2742 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2743 {
2744 static RAMBlock *block = NULL;
2745 char id[256];
2746 uint8_t len;
2747
2748 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2749 if (!block) {
2750 error_report("Ack, bad migration stream!");
2751 return NULL;
2752 }
2753 return block;
2754 }
2755
2756 len = qemu_get_byte(f);
2757 qemu_get_buffer(f, (uint8_t *)id, len);
2758 id[len] = 0;
2759
2760 block = qemu_ram_block_by_name(id);
2761 if (!block) {
2762 error_report("Can't find block %s", id);
2763 return NULL;
2764 }
2765
2766 if (ramblock_is_ignored(block)) {
2767 error_report("block %s should not be migrated !", id);
2768 return NULL;
2769 }
2770
2771 return block;
2772 }
2773
2774 static inline void *host_from_ram_block_offset(RAMBlock *block,
2775 ram_addr_t offset)
2776 {
2777 if (!offset_in_ramblock(block, offset)) {
2778 return NULL;
2779 }
2780
2781 return block->host + offset;
2782 }
2783
2784 static inline void *colo_cache_from_block_offset(RAMBlock *block,
2785 ram_addr_t offset, bool record_bitmap)
2786 {
2787 if (!offset_in_ramblock(block, offset)) {
2788 return NULL;
2789 }
2790 if (!block->colo_cache) {
2791 error_report("%s: colo_cache is NULL in block :%s",
2792 __func__, block->idstr);
2793 return NULL;
2794 }
2795
2796 /*
2797 * During colo checkpoint, we need bitmap of these migrated pages.
2798 * It help us to decide which pages in ram cache should be flushed
2799 * into VM's RAM later.
2800 */
2801 if (record_bitmap &&
2802 !test_and_set_bit(offset >> TARGET_PAGE_BITS, block->bmap)) {
2803 ram_state->migration_dirty_pages++;
2804 }
2805 return block->colo_cache + offset;
2806 }
2807
2808 /**
2809 * ram_handle_compressed: handle the zero page case
2810 *
2811 * If a page (or a whole RDMA chunk) has been
2812 * determined to be zero, then zap it.
2813 *
2814 * @host: host address for the zero page
2815 * @ch: what the page is filled from. We only support zero
2816 * @size: size of the zero page
2817 */
2818 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2819 {
2820 if (ch != 0 || !is_zero_range(host, size)) {
2821 memset(host, ch, size);
2822 }
2823 }
2824
2825 /* return the size after decompression, or negative value on error */
2826 static int
2827 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
2828 const uint8_t *source, size_t source_len)
2829 {
2830 int err;
2831
2832 err = inflateReset(stream);
2833 if (err != Z_OK) {
2834 return -1;
2835 }
2836
2837 stream->avail_in = source_len;
2838 stream->next_in = (uint8_t *)source;
2839 stream->avail_out = dest_len;
2840 stream->next_out = dest;
2841
2842 err = inflate(stream, Z_NO_FLUSH);
2843 if (err != Z_STREAM_END) {
2844 return -1;
2845 }
2846
2847 return stream->total_out;
2848 }
2849
2850 static void *do_data_decompress(void *opaque)
2851 {
2852 DecompressParam *param = opaque;
2853 unsigned long pagesize;
2854 uint8_t *des;
2855 int len, ret;
2856
2857 qemu_mutex_lock(&param->mutex);
2858 while (!param->quit) {
2859 if (param->des) {
2860 des = param->des;
2861 len = param->len;
2862 param->des = 0;
2863 qemu_mutex_unlock(&param->mutex);
2864
2865 pagesize = TARGET_PAGE_SIZE;
2866
2867 ret = qemu_uncompress_data(&param->stream, des, pagesize,
2868 param->compbuf, len);
2869 if (ret < 0 && migrate_get_current()->decompress_error_check) {
2870 error_report("decompress data failed");
2871 qemu_file_set_error(decomp_file, ret);
2872 }
2873
2874 qemu_mutex_lock(&decomp_done_lock);
2875 param->done = true;
2876 qemu_cond_signal(&decomp_done_cond);
2877 qemu_mutex_unlock(&decomp_done_lock);
2878
2879 qemu_mutex_lock(&param->mutex);
2880 } else {
2881 qemu_cond_wait(&param->cond, &param->mutex);
2882 }
2883 }
2884 qemu_mutex_unlock(&param->mutex);
2885
2886 return NULL;
2887 }
2888
2889 static int wait_for_decompress_done(void)
2890 {
2891 int idx, thread_count;
2892
2893 if (!migrate_use_compression()) {
2894 return 0;
2895 }
2896
2897 thread_count = migrate_decompress_threads();
2898 qemu_mutex_lock(&decomp_done_lock);
2899 for (idx = 0; idx < thread_count; idx++) {
2900 while (!decomp_param[idx].done) {
2901 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2902 }
2903 }
2904 qemu_mutex_unlock(&decomp_done_lock);
2905 return qemu_file_get_error(decomp_file);
2906 }
2907
2908 static void compress_threads_load_cleanup(void)
2909 {
2910 int i, thread_count;
2911
2912 if (!migrate_use_compression()) {
2913 return;
2914 }
2915 thread_count = migrate_decompress_threads();
2916 for (i = 0; i < thread_count; i++) {
2917 /*
2918 * we use it as a indicator which shows if the thread is
2919 * properly init'd or not
2920 */
2921 if (!decomp_param[i].compbuf) {
2922 break;
2923 }
2924
2925 qemu_mutex_lock(&decomp_param[i].mutex);
2926 decomp_param[i].quit = true;
2927 qemu_cond_signal(&decomp_param[i].cond);
2928 qemu_mutex_unlock(&decomp_param[i].mutex);
2929 }
2930 for (i = 0; i < thread_count; i++) {
2931 if (!decomp_param[i].compbuf) {
2932 break;
2933 }
2934
2935 qemu_thread_join(decompress_threads + i);
2936 qemu_mutex_destroy(&decomp_param[i].mutex);
2937 qemu_cond_destroy(&decomp_param[i].cond);
2938 inflateEnd(&decomp_param[i].stream);
2939 g_free(decomp_param[i].compbuf);
2940 decomp_param[i].compbuf = NULL;
2941 }
2942 g_free(decompress_threads);
2943 g_free(decomp_param);
2944 decompress_threads = NULL;
2945 decomp_param = NULL;
2946 decomp_file = NULL;
2947 }
2948
2949 static int compress_threads_load_setup(QEMUFile *f)
2950 {
2951 int i, thread_count;
2952
2953 if (!migrate_use_compression()) {
2954 return 0;
2955 }
2956
2957 thread_count = migrate_decompress_threads();
2958 decompress_threads = g_new0(QemuThread, thread_count);
2959 decomp_param = g_new0(DecompressParam, thread_count);
2960 qemu_mutex_init(&decomp_done_lock);
2961 qemu_cond_init(&decomp_done_cond);
2962 decomp_file = f;
2963 for (i = 0; i < thread_count; i++) {
2964 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
2965 goto exit;
2966 }
2967
2968 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2969 qemu_mutex_init(&decomp_param[i].mutex);
2970 qemu_cond_init(&decomp_param[i].cond);
2971 decomp_param[i].done = true;
2972 decomp_param[i].quit = false;
2973 qemu_thread_create(decompress_threads + i, "decompress",
2974 do_data_decompress, decomp_param + i,
2975 QEMU_THREAD_JOINABLE);
2976 }
2977 return 0;
2978 exit:
2979 compress_threads_load_cleanup();
2980 return -1;
2981 }
2982
2983 static void decompress_data_with_multi_threads(QEMUFile *f,
2984 void *host, int len)
2985 {
2986 int idx, thread_count;
2987
2988 thread_count = migrate_decompress_threads();
2989 qemu_mutex_lock(&decomp_done_lock);
2990 while (true) {
2991 for (idx = 0; idx < thread_count; idx++) {
2992 if (decomp_param[idx].done) {
2993 decomp_param[idx].done = false;
2994 qemu_mutex_lock(&decomp_param[idx].mutex);
2995 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2996 decomp_param[idx].des = host;
2997 decomp_param[idx].len = len;
2998 qemu_cond_signal(&decomp_param[idx].cond);
2999 qemu_mutex_unlock(&decomp_param[idx].mutex);
3000 break;
3001 }
3002 }
3003 if (idx < thread_count) {
3004 break;
3005 } else {
3006 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3007 }
3008 }
3009 qemu_mutex_unlock(&decomp_done_lock);
3010 }
3011
3012 /*
3013 * colo cache: this is for secondary VM, we cache the whole
3014 * memory of the secondary VM, it is need to hold the global lock
3015 * to call this helper.
3016 */
3017 int colo_init_ram_cache(void)
3018 {
3019 RAMBlock *block;
3020
3021 WITH_RCU_READ_LOCK_GUARD() {
3022 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3023 block->colo_cache = qemu_anon_ram_alloc(block->used_length,
3024 NULL,
3025 false);
3026 if (!block->colo_cache) {
3027 error_report("%s: Can't alloc memory for COLO cache of block %s,"
3028 "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
3029 block->used_length);
3030 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3031 if (block->colo_cache) {
3032 qemu_anon_ram_free(block->colo_cache, block->used_length);
3033 block->colo_cache = NULL;
3034 }
3035 }
3036 return -errno;
3037 }
3038 }
3039 }
3040
3041 /*
3042 * Record the dirty pages that sent by PVM, we use this dirty bitmap together
3043 * with to decide which page in cache should be flushed into SVM's RAM. Here
3044 * we use the same name 'ram_bitmap' as for migration.
3045 */
3046 if (ram_bytes_total()) {
3047 RAMBlock *block;
3048
3049 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3050 unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
3051 block->bmap = bitmap_new(pages);
3052 }
3053 }
3054
3055 ram_state_init(&ram_state);
3056 return 0;
3057 }
3058
3059 /* TODO: duplicated with ram_init_bitmaps */
3060 void colo_incoming_start_dirty_log(void)
3061 {
3062 RAMBlock *block = NULL;
3063 /* For memory_global_dirty_log_start below. */
3064 qemu_mutex_lock_iothread();
3065 qemu_mutex_lock_ramlist();
3066
3067 memory_global_dirty_log_sync();
3068 WITH_RCU_READ_LOCK_GUARD() {
3069 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3070 ramblock_sync_dirty_bitmap(ram_state, block);
3071 /* Discard this dirty bitmap record */
3072 bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
3073 }
3074 memory_global_dirty_log_start();
3075 }
3076 ram_state->migration_dirty_pages = 0;
3077 qemu_mutex_unlock_ramlist();
3078 qemu_mutex_unlock_iothread();
3079 }
3080
3081 /* It is need to hold the global lock to call this helper */
3082 void colo_release_ram_cache(void)
3083 {
3084 RAMBlock *block;
3085
3086 memory_global_dirty_log_stop();
3087 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3088 g_free(block->bmap);
3089 block->bmap = NULL;
3090 }
3091
3092 WITH_RCU_READ_LOCK_GUARD() {
3093 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3094 if (block->colo_cache) {
3095 qemu_anon_ram_free(block->colo_cache, block->used_length);
3096 block->colo_cache = NULL;
3097 }
3098 }
3099 }
3100 ram_state_cleanup(&ram_state);
3101 }
3102
3103 /**
3104 * ram_load_setup: Setup RAM for migration incoming side
3105 *
3106 * Returns zero to indicate success and negative for error
3107 *
3108 * @f: QEMUFile where to receive the data
3109 * @opaque: RAMState pointer
3110 */
3111 static int ram_load_setup(QEMUFile *f, void *opaque)
3112 {
3113 if (compress_threads_load_setup(f)) {
3114 return -1;
3115 }
3116
3117 xbzrle_load_setup();
3118 ramblock_recv_map_init();
3119
3120 return 0;
3121 }
3122
3123 static int ram_load_cleanup(void *opaque)
3124 {
3125 RAMBlock *rb;
3126
3127 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3128 qemu_ram_block_writeback(rb);
3129 }
3130
3131 xbzrle_load_cleanup();
3132 compress_threads_load_cleanup();
3133
3134 RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
3135 g_free(rb->receivedmap);
3136 rb->receivedmap = NULL;
3137 }
3138
3139 return 0;
3140 }
3141
3142 /**
3143 * ram_postcopy_incoming_init: allocate postcopy data structures
3144 *
3145 * Returns 0 for success and negative if there was one error
3146 *
3147 * @mis: current migration incoming state
3148 *
3149 * Allocate data structures etc needed by incoming migration with
3150 * postcopy-ram. postcopy-ram's similarly names
3151 * postcopy_ram_incoming_init does the work.
3152 */
3153 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3154 {
3155 return postcopy_ram_incoming_init(mis);
3156 }
3157
3158 /**
3159 * ram_load_postcopy: load a page in postcopy case
3160 *
3161 * Returns 0 for success or -errno in case of error
3162 *
3163 * Called in postcopy mode by ram_load().
3164 * rcu_read_lock is taken prior to this being called.
3165 *
3166 * @f: QEMUFile where to send the data
3167 */
3168 static int ram_load_postcopy(QEMUFile *f)
3169 {
3170 int flags = 0, ret = 0;
3171 bool place_needed = false;
3172 bool matches_target_page_size = false;
3173 MigrationIncomingState *mis = migration_incoming_get_current();
3174 /* Temporary page that is later 'placed' */
3175 void *postcopy_host_page = mis->postcopy_tmp_page;
3176 void *this_host = NULL;
3177 bool all_zero = true;
3178 int target_pages = 0;
3179
3180 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3181 ram_addr_t addr;
3182 void *host = NULL;
3183 void *page_buffer = NULL;
3184 void *place_source = NULL;
3185 RAMBlock *block = NULL;
3186 uint8_t ch;
3187 int len;
3188
3189 addr = qemu_get_be64(f);
3190
3191 /*
3192 * If qemu file error, we should stop here, and then "addr"
3193 * may be invalid
3194 */
3195 ret = qemu_file_get_error(f);
3196 if (ret) {
3197 break;
3198 }
3199
3200 flags = addr & ~TARGET_PAGE_MASK;
3201 addr &= TARGET_PAGE_MASK;
3202
3203 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3204 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3205 RAM_SAVE_FLAG_COMPRESS_PAGE)) {
3206 block = ram_block_from_stream(f, flags);
3207
3208 host = host_from_ram_block_offset(block, addr);
3209 if (!host) {
3210 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3211 ret = -EINVAL;
3212 break;
3213 }
3214 target_pages++;
3215 matches_target_page_size = block->page_size == TARGET_PAGE_SIZE;
3216 /*
3217 * Postcopy requires that we place whole host pages atomically;
3218 * these may be huge pages for RAMBlocks that are backed by
3219 * hugetlbfs.
3220 * To make it atomic, the data is read into a temporary page
3221 * that's moved into place later.
3222 * The migration protocol uses, possibly smaller, target-pages
3223 * however the source ensures it always sends all the components
3224 * of a host page in one chunk.
3225 */
3226 page_buffer = postcopy_host_page +
3227 ((uintptr_t)host & (block->page_size - 1));
3228 if (target_pages == 1) {
3229 this_host = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3230 block->page_size);
3231 } else {
3232 /* not the 1st TP within the HP */
3233 if (QEMU_ALIGN_DOWN((uintptr_t)host, block->page_size) !=
3234 (uintptr_t)this_host) {
3235 error_report("Non-same host page %p/%p",
3236 host, this_host);
3237 ret = -EINVAL;
3238 break;
3239 }
3240 }
3241
3242 /*
3243 * If it's the last part of a host page then we place the host
3244 * page
3245 */
3246 if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) {
3247 place_needed = true;
3248 }
3249 place_source = postcopy_host_page;
3250 }
3251
3252 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3253 case RAM_SAVE_FLAG_ZERO:
3254 ch = qemu_get_byte(f);
3255 /*
3256 * Can skip to set page_buffer when
3257 * this is a zero page and (block->page_size == TARGET_PAGE_SIZE).
3258 */
3259 if (ch || !matches_target_page_size) {
3260 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3261 }
3262 if (ch) {
3263 all_zero = false;
3264 }
3265 break;
3266
3267 case RAM_SAVE_FLAG_PAGE:
3268 all_zero = false;
3269 if (!matches_target_page_size) {
3270 /* For huge pages, we always use temporary buffer */
3271 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3272 } else {
3273 /*
3274 * For small pages that matches target page size, we
3275 * avoid the qemu_file copy. Instead we directly use
3276 * the buffer of QEMUFile to place the page. Note: we
3277 * cannot do any QEMUFile operation before using that
3278 * buffer to make sure the buffer is valid when
3279 * placing the page.
3280 */
3281 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3282 TARGET_PAGE_SIZE);
3283 }
3284 break;
3285 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3286 all_zero = false;
3287 len = qemu_get_be32(f);
3288 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3289 error_report("Invalid compressed data length: %d", len);
3290 ret = -EINVAL;
3291 break;
3292 }
3293 decompress_data_with_multi_threads(f, page_buffer, len);
3294 break;
3295
3296 case RAM_SAVE_FLAG_EOS:
3297 /* normal exit */
3298 multifd_recv_sync_main();
3299 break;
3300 default:
3301 error_report("Unknown combination of migration flags: %#x"
3302 " (postcopy mode)", flags);
3303 ret = -EINVAL;
3304 break;
3305 }
3306
3307 /* Got the whole host page, wait for decompress before placing. */
3308 if (place_needed) {
3309 ret |= wait_for_decompress_done();
3310 }
3311
3312 /* Detect for any possible file errors */
3313 if (!ret && qemu_file_get_error(f)) {
3314 ret = qemu_file_get_error(f);
3315 }
3316
3317 if (!ret && place_needed) {
3318 /* This gets called at the last target page in the host page */
3319 void *place_dest = (void *)QEMU_ALIGN_DOWN((uintptr_t)host,
3320 block->page_size);
3321
3322 if (all_zero) {
3323 ret = postcopy_place_page_zero(mis, place_dest,
3324 block);
3325 } else {
3326 ret = postcopy_place_page(mis, place_dest,
3327 place_source, block);
3328 }
3329 place_needed = false;
3330 target_pages = 0;
3331 /* Assume we have a zero page until we detect something different */
3332 all_zero = true;
3333 }
3334 }
3335
3336 return ret;
3337 }
3338
3339 static bool postcopy_is_advised(void)
3340 {
3341 PostcopyState ps = postcopy_state_get();
3342 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3343 }
3344
3345 static bool postcopy_is_running(void)
3346 {
3347 PostcopyState ps = postcopy_state_get();
3348 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3349 }
3350
3351 /*
3352 * Flush content of RAM cache into SVM's memory.
3353 * Only flush the pages that be dirtied by PVM or SVM or both.
3354 */
3355 void colo_flush_ram_cache(void)
3356 {
3357 RAMBlock *block = NULL;
3358 void *dst_host;
3359 void *src_host;
3360 unsigned long offset = 0;
3361
3362 memory_global_dirty_log_sync();
3363 WITH_RCU_READ_LOCK_GUARD() {
3364 RAMBLOCK_FOREACH_NOT_IGNORED(block) {
3365 ramblock_sync_dirty_bitmap(ram_state, block);
3366 }
3367 }
3368
3369 trace_colo_flush_ram_cache_begin(ram_state->migration_dirty_pages);
3370 WITH_RCU_READ_LOCK_GUARD() {
3371 block = QLIST_FIRST_RCU(&ram_list.blocks);
3372
3373 while (block) {
3374 offset = migration_bitmap_find_dirty(ram_state, block, offset);
3375
3376 if (((ram_addr_t)offset) << TARGET_PAGE_BITS
3377 >= block->used_length) {
3378 offset = 0;
3379 block = QLIST_NEXT_RCU(block, next);
3380 } else {
3381 migration_bitmap_clear_dirty(ram_state, block, offset);
3382 dst_host = block->host
3383 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3384 src_host = block->colo_cache
3385 + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
3386 memcpy(dst_host, src_host, TARGET_PAGE_SIZE);
3387 }
3388 }
3389 }
3390 trace_colo_flush_ram_cache_end();
3391 }
3392
3393 /**
3394 * ram_load_precopy: load pages in precopy case
3395 *
3396 * Returns 0 for success or -errno in case of error
3397 *
3398 * Called in precopy mode by ram_load().
3399 * rcu_read_lock is taken prior to this being called.
3400 *
3401 * @f: QEMUFile where to send the data
3402 */
3403 static int ram_load_precopy(QEMUFile *f)
3404 {
3405 int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
3406 /* ADVISE is earlier, it shows the source has the postcopy capability on */
3407 bool postcopy_advised = postcopy_is_advised();
3408 if (!migrate_use_compression()) {
3409 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3410 }
3411
3412 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3413 ram_addr_t addr, total_ram_bytes;
3414 void *host = NULL, *host_bak = NULL;
3415 uint8_t ch;
3416
3417 /*
3418 * Yield periodically to let main loop run, but an iteration of
3419 * the main loop is expensive, so do it each some iterations
3420 */
3421 if ((i & 32767) == 0 && qemu_in_coroutine()) {
3422 aio_co_schedule(qemu_get_current_aio_context(),
3423 qemu_coroutine_self());
3424 qemu_coroutine_yield();
3425 }
3426 i++;
3427
3428 addr = qemu_get_be64(f);
3429 flags = addr & ~TARGET_PAGE_MASK;
3430 addr &= TARGET_PAGE_MASK;
3431
3432 if (flags & invalid_flags) {
3433 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3434 error_report("Received an unexpected compressed page");
3435 }
3436
3437 ret = -EINVAL;
3438 break;
3439 }
3440
3441 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3442 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3443 RAMBlock *block = ram_block_from_stream(f, flags);
3444
3445 host = host_from_ram_block_offset(block, addr);
3446 /*
3447 * After going into COLO stage, we should not load the page
3448 * into SVM's memory directly, we put them into colo_cache firstly.
3449 * NOTE: We need to keep a copy of SVM's ram in colo_cache.
3450 * Previously, we copied all these memory in preparing stage of COLO
3451 * while we need to stop VM, which is a time-consuming process.
3452 * Here we optimize it by a trick, back-up every page while in
3453 * migration process while COLO is enabled, though it affects the
3454 * speed of the migration, but it obviously reduce the downtime of
3455 * back-up all SVM'S memory in COLO preparing stage.
3456 */
3457 if (migration_incoming_colo_enabled()) {
3458 if (migration_incoming_in_colo_state()) {
3459 /* In COLO stage, put all pages into cache temporarily */
3460 host = colo_cache_from_block_offset(block, addr, true);
3461 } else {
3462 /*
3463 * In migration stage but before COLO stage,
3464 * Put all pages into both cache and SVM's memory.
3465 */
3466 host_bak = colo_cache_from_block_offset(block, addr, false);
3467 }
3468 }
3469 if (!host) {
3470 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3471 ret = -EINVAL;
3472 break;
3473 }
3474 if (!migration_incoming_in_colo_state()) {
3475 ramblock_recv_bitmap_set(block, host);
3476 }
3477
3478 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3479 }
3480
3481 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3482 case RAM_SAVE_FLAG_MEM_SIZE:
3483 /* Synchronize RAM block list */
3484 total_ram_bytes = addr;
3485 while (!ret && total_ram_bytes) {
3486 RAMBlock *block;
3487 char id[256];
3488 ram_addr_t length;
3489
3490 len = qemu_get_byte(f);
3491 qemu_get_buffer(f, (uint8_t *)id, len);
3492 id[len] = 0;
3493 length = qemu_get_be64(f);
3494
3495 block = qemu_ram_block_by_name(id);
3496 if (block && !qemu_ram_is_migratable(block)) {
3497 error_report("block %s should not be migrated !", id);
3498 ret = -EINVAL;
3499 } else if (block) {
3500 if (length != block->used_length) {
3501 Error *local_err = NULL;
3502
3503 ret = qemu_ram_resize(block, length,
3504 &local_err);
3505 if (local_err) {
3506 error_report_err(local_err);
3507 }
3508 }
3509 /* For postcopy we need to check hugepage sizes match */
3510 if (postcopy_advised &&
3511 block->page_size != qemu_host_page_size) {
3512 uint64_t remote_page_size = qemu_get_be64(f);
3513 if (remote_page_size != block->page_size) {
3514 error_report("Mismatched RAM page size %s "
3515 "(local) %zd != %" PRId64,
3516 id, block->page_size,
3517 remote_page_size);
3518 ret = -EINVAL;
3519 }
3520 }
3521 if (migrate_ignore_shared()) {
3522 hwaddr addr = qemu_get_be64(f);
3523 if (ramblock_is_ignored(block) &&
3524 block->mr->addr != addr) {
3525 error_report("Mismatched GPAs for block %s "
3526 "%" PRId64 "!= %" PRId64,
3527 id, (uint64_t)addr,
3528 (uint64_t)block->mr->addr);
3529 ret = -EINVAL;
3530 }
3531 }
3532 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3533 block->idstr);
3534 } else {
3535 error_report("Unknown ramblock \"%s\", cannot "
3536 "accept migration", id);
3537 ret = -EINVAL;
3538 }
3539
3540 total_ram_bytes -= length;
3541 }
3542 break;
3543
3544 case RAM_SAVE_FLAG_ZERO:
3545 ch = qemu_get_byte(f);
3546 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3547 break;
3548
3549 case RAM_SAVE_FLAG_PAGE:
3550 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3551 break;
3552
3553 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3554 len = qemu_get_be32(f);
3555 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3556 error_report("Invalid compressed data length: %d", len);
3557 ret = -EINVAL;
3558 break;
3559 }
3560 decompress_data_with_multi_threads(f, host, len);
3561 break;
3562
3563 case RAM_SAVE_FLAG_XBZRLE:
3564 if (load_xbzrle(f, addr, host) < 0) {