tests/acpi: add microvm test
[qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "sysemu/kvm.h"
20 #include "qemu/error-report.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/sockets.h"
23 #include "sysemu/cryptodev.h"
24 #include "migration/migration.h"
25 #include "migration/postcopy-ram.h"
26 #include "trace.h"
27
28 #include <sys/ioctl.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
31
32 #include "standard-headers/linux/vhost_types.h"
33
34 #ifdef CONFIG_LINUX
35 #include <linux/userfaultfd.h>
36 #endif
37
38 #define VHOST_MEMORY_BASELINE_NREGIONS 8
39 #define VHOST_USER_F_PROTOCOL_FEATURES 30
40 #define VHOST_USER_SLAVE_MAX_FDS 8
41
42 /*
43 * Set maximum number of RAM slots supported to
44 * the maximum number supported by the target
45 * hardware plaform.
46 */
47 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \
48 defined(TARGET_ARM) || defined(TARGET_ARM_64)
49 #include "hw/acpi/acpi.h"
50 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS
51
52 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64)
53 #include "hw/ppc/spapr.h"
54 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
55
56 #else
57 #define VHOST_USER_MAX_RAM_SLOTS 512
58 #endif
59
60 /*
61 * Maximum size of virtio device config space
62 */
63 #define VHOST_USER_MAX_CONFIG_SIZE 256
64
65 enum VhostUserProtocolFeature {
66 VHOST_USER_PROTOCOL_F_MQ = 0,
67 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
68 VHOST_USER_PROTOCOL_F_RARP = 2,
69 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
70 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
71 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
72 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
73 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
74 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
75 VHOST_USER_PROTOCOL_F_CONFIG = 9,
76 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
77 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
78 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
79 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
80 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
81 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
82 VHOST_USER_PROTOCOL_F_MAX
83 };
84
85 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
86
87 typedef enum VhostUserRequest {
88 VHOST_USER_NONE = 0,
89 VHOST_USER_GET_FEATURES = 1,
90 VHOST_USER_SET_FEATURES = 2,
91 VHOST_USER_SET_OWNER = 3,
92 VHOST_USER_RESET_OWNER = 4,
93 VHOST_USER_SET_MEM_TABLE = 5,
94 VHOST_USER_SET_LOG_BASE = 6,
95 VHOST_USER_SET_LOG_FD = 7,
96 VHOST_USER_SET_VRING_NUM = 8,
97 VHOST_USER_SET_VRING_ADDR = 9,
98 VHOST_USER_SET_VRING_BASE = 10,
99 VHOST_USER_GET_VRING_BASE = 11,
100 VHOST_USER_SET_VRING_KICK = 12,
101 VHOST_USER_SET_VRING_CALL = 13,
102 VHOST_USER_SET_VRING_ERR = 14,
103 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
104 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
105 VHOST_USER_GET_QUEUE_NUM = 17,
106 VHOST_USER_SET_VRING_ENABLE = 18,
107 VHOST_USER_SEND_RARP = 19,
108 VHOST_USER_NET_SET_MTU = 20,
109 VHOST_USER_SET_SLAVE_REQ_FD = 21,
110 VHOST_USER_IOTLB_MSG = 22,
111 VHOST_USER_SET_VRING_ENDIAN = 23,
112 VHOST_USER_GET_CONFIG = 24,
113 VHOST_USER_SET_CONFIG = 25,
114 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
115 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
116 VHOST_USER_POSTCOPY_ADVISE = 28,
117 VHOST_USER_POSTCOPY_LISTEN = 29,
118 VHOST_USER_POSTCOPY_END = 30,
119 VHOST_USER_GET_INFLIGHT_FD = 31,
120 VHOST_USER_SET_INFLIGHT_FD = 32,
121 VHOST_USER_GPU_SET_SOCKET = 33,
122 VHOST_USER_RESET_DEVICE = 34,
123 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
124 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
125 VHOST_USER_ADD_MEM_REG = 37,
126 VHOST_USER_REM_MEM_REG = 38,
127 VHOST_USER_MAX
128 } VhostUserRequest;
129
130 typedef enum VhostUserSlaveRequest {
131 VHOST_USER_SLAVE_NONE = 0,
132 VHOST_USER_SLAVE_IOTLB_MSG = 1,
133 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
134 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
135 VHOST_USER_SLAVE_MAX
136 } VhostUserSlaveRequest;
137
138 typedef struct VhostUserMemoryRegion {
139 uint64_t guest_phys_addr;
140 uint64_t memory_size;
141 uint64_t userspace_addr;
142 uint64_t mmap_offset;
143 } VhostUserMemoryRegion;
144
145 typedef struct VhostUserMemory {
146 uint32_t nregions;
147 uint32_t padding;
148 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
149 } VhostUserMemory;
150
151 typedef struct VhostUserMemRegMsg {
152 uint32_t padding;
153 VhostUserMemoryRegion region;
154 } VhostUserMemRegMsg;
155
156 typedef struct VhostUserLog {
157 uint64_t mmap_size;
158 uint64_t mmap_offset;
159 } VhostUserLog;
160
161 typedef struct VhostUserConfig {
162 uint32_t offset;
163 uint32_t size;
164 uint32_t flags;
165 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
166 } VhostUserConfig;
167
168 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
169 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
170
171 typedef struct VhostUserCryptoSession {
172 /* session id for success, -1 on errors */
173 int64_t session_id;
174 CryptoDevBackendSymSessionInfo session_setup_data;
175 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
176 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
177 } VhostUserCryptoSession;
178
179 static VhostUserConfig c __attribute__ ((unused));
180 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
181 + sizeof(c.size) \
182 + sizeof(c.flags))
183
184 typedef struct VhostUserVringArea {
185 uint64_t u64;
186 uint64_t size;
187 uint64_t offset;
188 } VhostUserVringArea;
189
190 typedef struct VhostUserInflight {
191 uint64_t mmap_size;
192 uint64_t mmap_offset;
193 uint16_t num_queues;
194 uint16_t queue_size;
195 } VhostUserInflight;
196
197 typedef struct {
198 VhostUserRequest request;
199
200 #define VHOST_USER_VERSION_MASK (0x3)
201 #define VHOST_USER_REPLY_MASK (0x1<<2)
202 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
203 uint32_t flags;
204 uint32_t size; /* the following payload size */
205 } QEMU_PACKED VhostUserHeader;
206
207 typedef union {
208 #define VHOST_USER_VRING_IDX_MASK (0xff)
209 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
210 uint64_t u64;
211 struct vhost_vring_state state;
212 struct vhost_vring_addr addr;
213 VhostUserMemory memory;
214 VhostUserMemRegMsg mem_reg;
215 VhostUserLog log;
216 struct vhost_iotlb_msg iotlb;
217 VhostUserConfig config;
218 VhostUserCryptoSession session;
219 VhostUserVringArea area;
220 VhostUserInflight inflight;
221 } VhostUserPayload;
222
223 typedef struct VhostUserMsg {
224 VhostUserHeader hdr;
225 VhostUserPayload payload;
226 } QEMU_PACKED VhostUserMsg;
227
228 static VhostUserMsg m __attribute__ ((unused));
229 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
230
231 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
232
233 /* The version of the protocol we support */
234 #define VHOST_USER_VERSION (0x1)
235
236 struct vhost_user {
237 struct vhost_dev *dev;
238 /* Shared between vhost devs of the same virtio device */
239 VhostUserState *user;
240 int slave_fd;
241 NotifierWithReturn postcopy_notifier;
242 struct PostCopyFD postcopy_fd;
243 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
244 /* Length of the region_rb and region_rb_offset arrays */
245 size_t region_rb_len;
246 /* RAMBlock associated with a given region */
247 RAMBlock **region_rb;
248 /* The offset from the start of the RAMBlock to the start of the
249 * vhost region.
250 */
251 ram_addr_t *region_rb_offset;
252
253 /* True once we've entered postcopy_listen */
254 bool postcopy_listen;
255
256 /* Our current regions */
257 int num_shadow_regions;
258 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
259 };
260
261 struct scrub_regions {
262 struct vhost_memory_region *region;
263 int reg_idx;
264 int fd_idx;
265 };
266
267 static bool ioeventfd_enabled(void)
268 {
269 return !kvm_enabled() || kvm_eventfds_enabled();
270 }
271
272 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
273 {
274 struct vhost_user *u = dev->opaque;
275 CharBackend *chr = u->user->chr;
276 uint8_t *p = (uint8_t *) msg;
277 int r, size = VHOST_USER_HDR_SIZE;
278
279 r = qemu_chr_fe_read_all(chr, p, size);
280 if (r != size) {
281 error_report("Failed to read msg header. Read %d instead of %d."
282 " Original request %d.", r, size, msg->hdr.request);
283 return -1;
284 }
285
286 /* validate received flags */
287 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
288 error_report("Failed to read msg header."
289 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
290 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
291 return -1;
292 }
293
294 return 0;
295 }
296
297 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
298 {
299 struct vhost_user *u = dev->opaque;
300 CharBackend *chr = u->user->chr;
301 uint8_t *p = (uint8_t *) msg;
302 int r, size;
303
304 if (vhost_user_read_header(dev, msg) < 0) {
305 return -1;
306 }
307
308 /* validate message size is sane */
309 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
310 error_report("Failed to read msg header."
311 " Size %d exceeds the maximum %zu.", msg->hdr.size,
312 VHOST_USER_PAYLOAD_SIZE);
313 return -1;
314 }
315
316 if (msg->hdr.size) {
317 p += VHOST_USER_HDR_SIZE;
318 size = msg->hdr.size;
319 r = qemu_chr_fe_read_all(chr, p, size);
320 if (r != size) {
321 error_report("Failed to read msg payload."
322 " Read %d instead of %d.", r, msg->hdr.size);
323 return -1;
324 }
325 }
326
327 return 0;
328 }
329
330 static int process_message_reply(struct vhost_dev *dev,
331 const VhostUserMsg *msg)
332 {
333 VhostUserMsg msg_reply;
334
335 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
336 return 0;
337 }
338
339 if (vhost_user_read(dev, &msg_reply) < 0) {
340 return -1;
341 }
342
343 if (msg_reply.hdr.request != msg->hdr.request) {
344 error_report("Received unexpected msg type."
345 "Expected %d received %d",
346 msg->hdr.request, msg_reply.hdr.request);
347 return -1;
348 }
349
350 return msg_reply.payload.u64 ? -1 : 0;
351 }
352
353 static bool vhost_user_one_time_request(VhostUserRequest request)
354 {
355 switch (request) {
356 case VHOST_USER_SET_OWNER:
357 case VHOST_USER_RESET_OWNER:
358 case VHOST_USER_SET_MEM_TABLE:
359 case VHOST_USER_GET_QUEUE_NUM:
360 case VHOST_USER_NET_SET_MTU:
361 return true;
362 default:
363 return false;
364 }
365 }
366
367 /* most non-init callers ignore the error */
368 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
369 int *fds, int fd_num)
370 {
371 struct vhost_user *u = dev->opaque;
372 CharBackend *chr = u->user->chr;
373 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
374
375 /*
376 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
377 * we just need send it once in the first time. For later such
378 * request, we just ignore it.
379 */
380 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
381 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
382 return 0;
383 }
384
385 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
386 error_report("Failed to set msg fds.");
387 return -1;
388 }
389
390 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
391 if (ret != size) {
392 error_report("Failed to write msg."
393 " Wrote %d instead of %d.", ret, size);
394 return -1;
395 }
396
397 return 0;
398 }
399
400 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
401 {
402 VhostUserMsg msg = {
403 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
404 .hdr.flags = VHOST_USER_VERSION,
405 };
406
407 return vhost_user_write(dev, &msg, &fd, 1);
408 }
409
410 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
411 struct vhost_log *log)
412 {
413 int fds[VHOST_USER_MAX_RAM_SLOTS];
414 size_t fd_num = 0;
415 bool shmfd = virtio_has_feature(dev->protocol_features,
416 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
417 VhostUserMsg msg = {
418 .hdr.request = VHOST_USER_SET_LOG_BASE,
419 .hdr.flags = VHOST_USER_VERSION,
420 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
421 .payload.log.mmap_offset = 0,
422 .hdr.size = sizeof(msg.payload.log),
423 };
424
425 if (shmfd && log->fd != -1) {
426 fds[fd_num++] = log->fd;
427 }
428
429 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
430 return -1;
431 }
432
433 if (shmfd) {
434 msg.hdr.size = 0;
435 if (vhost_user_read(dev, &msg) < 0) {
436 return -1;
437 }
438
439 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
440 error_report("Received unexpected msg type. "
441 "Expected %d received %d",
442 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
443 return -1;
444 }
445 }
446
447 return 0;
448 }
449
450 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
451 int *fd)
452 {
453 MemoryRegion *mr;
454
455 assert((uintptr_t)addr == addr);
456 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
457 *fd = memory_region_get_fd(mr);
458
459 return mr;
460 }
461
462 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
463 struct vhost_memory_region *src,
464 uint64_t mmap_offset)
465 {
466 assert(src != NULL && dst != NULL);
467 dst->userspace_addr = src->userspace_addr;
468 dst->memory_size = src->memory_size;
469 dst->guest_phys_addr = src->guest_phys_addr;
470 dst->mmap_offset = mmap_offset;
471 }
472
473 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
474 struct vhost_dev *dev,
475 VhostUserMsg *msg,
476 int *fds, size_t *fd_num,
477 bool track_ramblocks)
478 {
479 int i, fd;
480 ram_addr_t offset;
481 MemoryRegion *mr;
482 struct vhost_memory_region *reg;
483 VhostUserMemoryRegion region_buffer;
484
485 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
486
487 for (i = 0; i < dev->mem->nregions; ++i) {
488 reg = dev->mem->regions + i;
489
490 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
491 if (fd > 0) {
492 if (track_ramblocks) {
493 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
494 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
495 reg->memory_size,
496 reg->guest_phys_addr,
497 reg->userspace_addr,
498 offset);
499 u->region_rb_offset[i] = offset;
500 u->region_rb[i] = mr->ram_block;
501 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
502 error_report("Failed preparing vhost-user memory table msg");
503 return -1;
504 }
505 vhost_user_fill_msg_region(&region_buffer, reg, offset);
506 msg->payload.memory.regions[*fd_num] = region_buffer;
507 fds[(*fd_num)++] = fd;
508 } else if (track_ramblocks) {
509 u->region_rb_offset[i] = 0;
510 u->region_rb[i] = NULL;
511 }
512 }
513
514 msg->payload.memory.nregions = *fd_num;
515
516 if (!*fd_num) {
517 error_report("Failed initializing vhost-user memory map, "
518 "consider using -object memory-backend-file share=on");
519 return -1;
520 }
521
522 msg->hdr.size = sizeof(msg->payload.memory.nregions);
523 msg->hdr.size += sizeof(msg->payload.memory.padding);
524 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
525
526 return 1;
527 }
528
529 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
530 struct vhost_memory_region *vdev_reg)
531 {
532 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
533 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
534 shadow_reg->memory_size == vdev_reg->memory_size;
535 }
536
537 static void scrub_shadow_regions(struct vhost_dev *dev,
538 struct scrub_regions *add_reg,
539 int *nr_add_reg,
540 struct scrub_regions *rem_reg,
541 int *nr_rem_reg, uint64_t *shadow_pcb,
542 bool track_ramblocks)
543 {
544 struct vhost_user *u = dev->opaque;
545 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
546 struct vhost_memory_region *reg, *shadow_reg;
547 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
548 ram_addr_t offset;
549 MemoryRegion *mr;
550 bool matching;
551
552 /*
553 * Find memory regions present in our shadow state which are not in
554 * the device's current memory state.
555 *
556 * Mark regions in both the shadow and device state as "found".
557 */
558 for (i = 0; i < u->num_shadow_regions; i++) {
559 shadow_reg = &u->shadow_regions[i];
560 matching = false;
561
562 for (j = 0; j < dev->mem->nregions; j++) {
563 reg = &dev->mem->regions[j];
564
565 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
566
567 if (reg_equal(shadow_reg, reg)) {
568 matching = true;
569 found[j] = true;
570 if (track_ramblocks) {
571 /*
572 * Reset postcopy client bases, region_rb, and
573 * region_rb_offset in case regions are removed.
574 */
575 if (fd > 0) {
576 u->region_rb_offset[j] = offset;
577 u->region_rb[j] = mr->ram_block;
578 shadow_pcb[j] = u->postcopy_client_bases[i];
579 } else {
580 u->region_rb_offset[j] = 0;
581 u->region_rb[j] = NULL;
582 }
583 }
584 break;
585 }
586 }
587
588 /*
589 * If the region was not found in the current device memory state
590 * create an entry for it in the removed list.
591 */
592 if (!matching) {
593 rem_reg[rm_idx].region = shadow_reg;
594 rem_reg[rm_idx++].reg_idx = i;
595 }
596 }
597
598 /*
599 * For regions not marked "found", create entries in the added list.
600 *
601 * Note their indexes in the device memory state and the indexes of their
602 * file descriptors.
603 */
604 for (i = 0; i < dev->mem->nregions; i++) {
605 reg = &dev->mem->regions[i];
606 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
607 if (fd > 0) {
608 ++fd_num;
609 }
610
611 /*
612 * If the region was in both the shadow and device state we don't
613 * need to send a VHOST_USER_ADD_MEM_REG message for it.
614 */
615 if (found[i]) {
616 continue;
617 }
618
619 add_reg[add_idx].region = reg;
620 add_reg[add_idx].reg_idx = i;
621 add_reg[add_idx++].fd_idx = fd_num;
622 }
623 *nr_rem_reg = rm_idx;
624 *nr_add_reg = add_idx;
625
626 return;
627 }
628
629 static int send_remove_regions(struct vhost_dev *dev,
630 struct scrub_regions *remove_reg,
631 int nr_rem_reg, VhostUserMsg *msg,
632 bool reply_supported)
633 {
634 struct vhost_user *u = dev->opaque;
635 struct vhost_memory_region *shadow_reg;
636 int i, fd, shadow_reg_idx, ret;
637 ram_addr_t offset;
638 VhostUserMemoryRegion region_buffer;
639
640 /*
641 * The regions in remove_reg appear in the same order they do in the
642 * shadow table. Therefore we can minimize memory copies by iterating
643 * through remove_reg backwards.
644 */
645 for (i = nr_rem_reg - 1; i >= 0; i--) {
646 shadow_reg = remove_reg[i].region;
647 shadow_reg_idx = remove_reg[i].reg_idx;
648
649 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
650
651 if (fd > 0) {
652 msg->hdr.request = VHOST_USER_REM_MEM_REG;
653 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
654 msg->payload.mem_reg.region = region_buffer;
655
656 if (vhost_user_write(dev, msg, &fd, 1) < 0) {
657 return -1;
658 }
659
660 if (reply_supported) {
661 ret = process_message_reply(dev, msg);
662 if (ret) {
663 return ret;
664 }
665 }
666 }
667
668 /*
669 * At this point we know the backend has unmapped the region. It is now
670 * safe to remove it from the shadow table.
671 */
672 memmove(&u->shadow_regions[shadow_reg_idx],
673 &u->shadow_regions[shadow_reg_idx + 1],
674 sizeof(struct vhost_memory_region) *
675 (u->num_shadow_regions - shadow_reg_idx - 1));
676 u->num_shadow_regions--;
677 }
678
679 return 0;
680 }
681
682 static int send_add_regions(struct vhost_dev *dev,
683 struct scrub_regions *add_reg, int nr_add_reg,
684 VhostUserMsg *msg, uint64_t *shadow_pcb,
685 bool reply_supported, bool track_ramblocks)
686 {
687 struct vhost_user *u = dev->opaque;
688 int i, fd, ret, reg_idx, reg_fd_idx;
689 struct vhost_memory_region *reg;
690 MemoryRegion *mr;
691 ram_addr_t offset;
692 VhostUserMsg msg_reply;
693 VhostUserMemoryRegion region_buffer;
694
695 for (i = 0; i < nr_add_reg; i++) {
696 reg = add_reg[i].region;
697 reg_idx = add_reg[i].reg_idx;
698 reg_fd_idx = add_reg[i].fd_idx;
699
700 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
701
702 if (fd > 0) {
703 if (track_ramblocks) {
704 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
705 reg->memory_size,
706 reg->guest_phys_addr,
707 reg->userspace_addr,
708 offset);
709 u->region_rb_offset[reg_idx] = offset;
710 u->region_rb[reg_idx] = mr->ram_block;
711 }
712 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
713 vhost_user_fill_msg_region(&region_buffer, reg, offset);
714 msg->payload.mem_reg.region = region_buffer;
715
716 if (vhost_user_write(dev, msg, &fd, 1) < 0) {
717 return -1;
718 }
719
720 if (track_ramblocks) {
721 uint64_t reply_gpa;
722
723 if (vhost_user_read(dev, &msg_reply) < 0) {
724 return -1;
725 }
726
727 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
728
729 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
730 error_report("%s: Received unexpected msg type."
731 "Expected %d received %d", __func__,
732 VHOST_USER_ADD_MEM_REG,
733 msg_reply.hdr.request);
734 return -1;
735 }
736
737 /*
738 * We're using the same structure, just reusing one of the
739 * fields, so it should be the same size.
740 */
741 if (msg_reply.hdr.size != msg->hdr.size) {
742 error_report("%s: Unexpected size for postcopy reply "
743 "%d vs %d", __func__, msg_reply.hdr.size,
744 msg->hdr.size);
745 return -1;
746 }
747
748 /* Get the postcopy client base from the backend's reply. */
749 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
750 shadow_pcb[reg_idx] =
751 msg_reply.payload.mem_reg.region.userspace_addr;
752 trace_vhost_user_set_mem_table_postcopy(
753 msg_reply.payload.mem_reg.region.userspace_addr,
754 msg->payload.mem_reg.region.userspace_addr,
755 reg_fd_idx, reg_idx);
756 } else {
757 error_report("%s: invalid postcopy reply for region. "
758 "Got guest physical address %" PRIX64 ", expected "
759 "%" PRIX64, __func__, reply_gpa,
760 dev->mem->regions[reg_idx].guest_phys_addr);
761 return -1;
762 }
763 } else if (reply_supported) {
764 ret = process_message_reply(dev, msg);
765 if (ret) {
766 return ret;
767 }
768 }
769 } else if (track_ramblocks) {
770 u->region_rb_offset[reg_idx] = 0;
771 u->region_rb[reg_idx] = NULL;
772 }
773
774 /*
775 * At this point, we know the backend has mapped in the new
776 * region, if the region has a valid file descriptor.
777 *
778 * The region should now be added to the shadow table.
779 */
780 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
781 reg->guest_phys_addr;
782 u->shadow_regions[u->num_shadow_regions].userspace_addr =
783 reg->userspace_addr;
784 u->shadow_regions[u->num_shadow_regions].memory_size =
785 reg->memory_size;
786 u->num_shadow_regions++;
787 }
788
789 return 0;
790 }
791
792 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
793 VhostUserMsg *msg,
794 bool reply_supported,
795 bool track_ramblocks)
796 {
797 struct vhost_user *u = dev->opaque;
798 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
799 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
800 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
801 int nr_add_reg, nr_rem_reg;
802
803 msg->hdr.size = sizeof(msg->payload.mem_reg.padding) +
804 sizeof(VhostUserMemoryRegion);
805
806 /* Find the regions which need to be removed or added. */
807 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
808 shadow_pcb, track_ramblocks);
809
810 if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
811 reply_supported) < 0)
812 {
813 goto err;
814 }
815
816 if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
817 shadow_pcb, reply_supported, track_ramblocks) < 0)
818 {
819 goto err;
820 }
821
822 if (track_ramblocks) {
823 memcpy(u->postcopy_client_bases, shadow_pcb,
824 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
825 /*
826 * Now we've registered this with the postcopy code, we ack to the
827 * client, because now we're in the position to be able to deal with
828 * any faults it generates.
829 */
830 /* TODO: Use this for failure cases as well with a bad value. */
831 msg->hdr.size = sizeof(msg->payload.u64);
832 msg->payload.u64 = 0; /* OK */
833
834 if (vhost_user_write(dev, msg, NULL, 0) < 0) {
835 return -1;
836 }
837 }
838
839 return 0;
840
841 err:
842 if (track_ramblocks) {
843 memcpy(u->postcopy_client_bases, shadow_pcb,
844 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
845 }
846
847 return -1;
848 }
849
850 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
851 struct vhost_memory *mem,
852 bool reply_supported,
853 bool config_mem_slots)
854 {
855 struct vhost_user *u = dev->opaque;
856 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
857 size_t fd_num = 0;
858 VhostUserMsg msg_reply;
859 int region_i, msg_i;
860
861 VhostUserMsg msg = {
862 .hdr.flags = VHOST_USER_VERSION,
863 };
864
865 if (u->region_rb_len < dev->mem->nregions) {
866 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
867 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
868 dev->mem->nregions);
869 memset(&(u->region_rb[u->region_rb_len]), '\0',
870 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
871 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
872 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
873 u->region_rb_len = dev->mem->nregions;
874 }
875
876 if (config_mem_slots) {
877 if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
878 true) < 0) {
879 return -1;
880 }
881 } else {
882 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
883 true) < 0) {
884 return -1;
885 }
886
887 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
888 return -1;
889 }
890
891 if (vhost_user_read(dev, &msg_reply) < 0) {
892 return -1;
893 }
894
895 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
896 error_report("%s: Received unexpected msg type."
897 "Expected %d received %d", __func__,
898 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
899 return -1;
900 }
901
902 /*
903 * We're using the same structure, just reusing one of the
904 * fields, so it should be the same size.
905 */
906 if (msg_reply.hdr.size != msg.hdr.size) {
907 error_report("%s: Unexpected size for postcopy reply "
908 "%d vs %d", __func__, msg_reply.hdr.size,
909 msg.hdr.size);
910 return -1;
911 }
912
913 memset(u->postcopy_client_bases, 0,
914 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
915
916 /*
917 * They're in the same order as the regions that were sent
918 * but some of the regions were skipped (above) if they
919 * didn't have fd's
920 */
921 for (msg_i = 0, region_i = 0;
922 region_i < dev->mem->nregions;
923 region_i++) {
924 if (msg_i < fd_num &&
925 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
926 dev->mem->regions[region_i].guest_phys_addr) {
927 u->postcopy_client_bases[region_i] =
928 msg_reply.payload.memory.regions[msg_i].userspace_addr;
929 trace_vhost_user_set_mem_table_postcopy(
930 msg_reply.payload.memory.regions[msg_i].userspace_addr,
931 msg.payload.memory.regions[msg_i].userspace_addr,
932 msg_i, region_i);
933 msg_i++;
934 }
935 }
936 if (msg_i != fd_num) {
937 error_report("%s: postcopy reply not fully consumed "
938 "%d vs %zd",
939 __func__, msg_i, fd_num);
940 return -1;
941 }
942
943 /*
944 * Now we've registered this with the postcopy code, we ack to the
945 * client, because now we're in the position to be able to deal
946 * with any faults it generates.
947 */
948 /* TODO: Use this for failure cases as well with a bad value. */
949 msg.hdr.size = sizeof(msg.payload.u64);
950 msg.payload.u64 = 0; /* OK */
951 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
952 return -1;
953 }
954 }
955
956 return 0;
957 }
958
959 static int vhost_user_set_mem_table(struct vhost_dev *dev,
960 struct vhost_memory *mem)
961 {
962 struct vhost_user *u = dev->opaque;
963 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
964 size_t fd_num = 0;
965 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
966 bool reply_supported = virtio_has_feature(dev->protocol_features,
967 VHOST_USER_PROTOCOL_F_REPLY_ACK);
968 bool config_mem_slots =
969 virtio_has_feature(dev->protocol_features,
970 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
971
972 if (do_postcopy) {
973 /*
974 * Postcopy has enough differences that it's best done in it's own
975 * version
976 */
977 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
978 config_mem_slots);
979 }
980
981 VhostUserMsg msg = {
982 .hdr.flags = VHOST_USER_VERSION,
983 };
984
985 if (reply_supported) {
986 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
987 }
988
989 if (config_mem_slots) {
990 if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
991 false) < 0) {
992 return -1;
993 }
994 } else {
995 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
996 false) < 0) {
997 return -1;
998 }
999 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
1000 return -1;
1001 }
1002
1003 if (reply_supported) {
1004 return process_message_reply(dev, &msg);
1005 }
1006 }
1007
1008 return 0;
1009 }
1010
1011 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1012 struct vhost_vring_addr *addr)
1013 {
1014 VhostUserMsg msg = {
1015 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1016 .hdr.flags = VHOST_USER_VERSION,
1017 .payload.addr = *addr,
1018 .hdr.size = sizeof(msg.payload.addr),
1019 };
1020
1021 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1022 return -1;
1023 }
1024
1025 return 0;
1026 }
1027
1028 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1029 struct vhost_vring_state *ring)
1030 {
1031 bool cross_endian = virtio_has_feature(dev->protocol_features,
1032 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1033 VhostUserMsg msg = {
1034 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1035 .hdr.flags = VHOST_USER_VERSION,
1036 .payload.state = *ring,
1037 .hdr.size = sizeof(msg.payload.state),
1038 };
1039
1040 if (!cross_endian) {
1041 error_report("vhost-user trying to send unhandled ioctl");
1042 return -1;
1043 }
1044
1045 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1046 return -1;
1047 }
1048
1049 return 0;
1050 }
1051
1052 static int vhost_set_vring(struct vhost_dev *dev,
1053 unsigned long int request,
1054 struct vhost_vring_state *ring)
1055 {
1056 VhostUserMsg msg = {
1057 .hdr.request = request,
1058 .hdr.flags = VHOST_USER_VERSION,
1059 .payload.state = *ring,
1060 .hdr.size = sizeof(msg.payload.state),
1061 };
1062
1063 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1064 return -1;
1065 }
1066
1067 return 0;
1068 }
1069
1070 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1071 struct vhost_vring_state *ring)
1072 {
1073 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1074 }
1075
1076 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
1077 int queue_idx)
1078 {
1079 struct vhost_user *u = dev->opaque;
1080 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1081 VirtIODevice *vdev = dev->vdev;
1082
1083 if (n->addr && !n->set) {
1084 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
1085 n->set = true;
1086 }
1087 }
1088
1089 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
1090 int queue_idx)
1091 {
1092 struct vhost_user *u = dev->opaque;
1093 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1094 VirtIODevice *vdev = dev->vdev;
1095
1096 if (n->addr && n->set) {
1097 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1098 n->set = false;
1099 }
1100 }
1101
1102 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1103 struct vhost_vring_state *ring)
1104 {
1105 vhost_user_host_notifier_restore(dev, ring->index);
1106
1107 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1108 }
1109
1110 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1111 {
1112 int i;
1113
1114 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1115 return -1;
1116 }
1117
1118 for (i = 0; i < dev->nvqs; ++i) {
1119 struct vhost_vring_state state = {
1120 .index = dev->vq_index + i,
1121 .num = enable,
1122 };
1123
1124 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1125 }
1126
1127 return 0;
1128 }
1129
1130 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1131 struct vhost_vring_state *ring)
1132 {
1133 VhostUserMsg msg = {
1134 .hdr.request = VHOST_USER_GET_VRING_BASE,
1135 .hdr.flags = VHOST_USER_VERSION,
1136 .payload.state = *ring,
1137 .hdr.size = sizeof(msg.payload.state),
1138 };
1139
1140 vhost_user_host_notifier_remove(dev, ring->index);
1141
1142 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1143 return -1;
1144 }
1145
1146 if (vhost_user_read(dev, &msg) < 0) {
1147 return -1;
1148 }
1149
1150 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1151 error_report("Received unexpected msg type. Expected %d received %d",
1152 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1153 return -1;
1154 }
1155
1156 if (msg.hdr.size != sizeof(msg.payload.state)) {
1157 error_report("Received bad msg size.");
1158 return -1;
1159 }
1160
1161 *ring = msg.payload.state;
1162
1163 return 0;
1164 }
1165
1166 static int vhost_set_vring_file(struct vhost_dev *dev,
1167 VhostUserRequest request,
1168 struct vhost_vring_file *file)
1169 {
1170 int fds[VHOST_USER_MAX_RAM_SLOTS];
1171 size_t fd_num = 0;
1172 VhostUserMsg msg = {
1173 .hdr.request = request,
1174 .hdr.flags = VHOST_USER_VERSION,
1175 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1176 .hdr.size = sizeof(msg.payload.u64),
1177 };
1178
1179 if (ioeventfd_enabled() && file->fd > 0) {
1180 fds[fd_num++] = file->fd;
1181 } else {
1182 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1183 }
1184
1185 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
1186 return -1;
1187 }
1188
1189 return 0;
1190 }
1191
1192 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1193 struct vhost_vring_file *file)
1194 {
1195 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1196 }
1197
1198 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1199 struct vhost_vring_file *file)
1200 {
1201 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1202 }
1203
1204 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
1205 {
1206 VhostUserMsg msg = {
1207 .hdr.request = request,
1208 .hdr.flags = VHOST_USER_VERSION,
1209 .payload.u64 = u64,
1210 .hdr.size = sizeof(msg.payload.u64),
1211 };
1212
1213 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1214 return -1;
1215 }
1216
1217 return 0;
1218 }
1219
1220 static int vhost_user_set_features(struct vhost_dev *dev,
1221 uint64_t features)
1222 {
1223 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
1224 }
1225
1226 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1227 uint64_t features)
1228 {
1229 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
1230 }
1231
1232 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1233 {
1234 VhostUserMsg msg = {
1235 .hdr.request = request,
1236 .hdr.flags = VHOST_USER_VERSION,
1237 };
1238
1239 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1240 return 0;
1241 }
1242
1243 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1244 return -1;
1245 }
1246
1247 if (vhost_user_read(dev, &msg) < 0) {
1248 return -1;
1249 }
1250
1251 if (msg.hdr.request != request) {
1252 error_report("Received unexpected msg type. Expected %d received %d",
1253 request, msg.hdr.request);
1254 return -1;
1255 }
1256
1257 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1258 error_report("Received bad msg size.");
1259 return -1;
1260 }
1261
1262 *u64 = msg.payload.u64;
1263
1264 return 0;
1265 }
1266
1267 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1268 {
1269 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
1270 }
1271
1272 static int vhost_user_set_owner(struct vhost_dev *dev)
1273 {
1274 VhostUserMsg msg = {
1275 .hdr.request = VHOST_USER_SET_OWNER,
1276 .hdr.flags = VHOST_USER_VERSION,
1277 };
1278
1279 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1280 return -1;
1281 }
1282
1283 return 0;
1284 }
1285
1286 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1287 uint64_t *max_memslots)
1288 {
1289 uint64_t backend_max_memslots;
1290 int err;
1291
1292 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1293 &backend_max_memslots);
1294 if (err < 0) {
1295 return err;
1296 }
1297
1298 *max_memslots = backend_max_memslots;
1299
1300 return 0;
1301 }
1302
1303 static int vhost_user_reset_device(struct vhost_dev *dev)
1304 {
1305 VhostUserMsg msg = {
1306 .hdr.flags = VHOST_USER_VERSION,
1307 };
1308
1309 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1310 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1311 ? VHOST_USER_RESET_DEVICE
1312 : VHOST_USER_RESET_OWNER;
1313
1314 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1315 return -1;
1316 }
1317
1318 return 0;
1319 }
1320
1321 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1322 {
1323 int ret = -1;
1324
1325 if (!dev->config_ops) {
1326 return -1;
1327 }
1328
1329 if (dev->config_ops->vhost_dev_config_notifier) {
1330 ret = dev->config_ops->vhost_dev_config_notifier(dev);
1331 }
1332
1333 return ret;
1334 }
1335
1336 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1337 VhostUserVringArea *area,
1338 int fd)
1339 {
1340 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1341 size_t page_size = qemu_real_host_page_size;
1342 struct vhost_user *u = dev->opaque;
1343 VhostUserState *user = u->user;
1344 VirtIODevice *vdev = dev->vdev;
1345 VhostUserHostNotifier *n;
1346 void *addr;
1347 char *name;
1348
1349 if (!virtio_has_feature(dev->protocol_features,
1350 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1351 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1352 return -1;
1353 }
1354
1355 n = &user->notifier[queue_idx];
1356
1357 if (n->addr) {
1358 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1359 object_unparent(OBJECT(&n->mr));
1360 munmap(n->addr, page_size);
1361 n->addr = NULL;
1362 }
1363
1364 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1365 return 0;
1366 }
1367
1368 /* Sanity check. */
1369 if (area->size != page_size) {
1370 return -1;
1371 }
1372
1373 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1374 fd, area->offset);
1375 if (addr == MAP_FAILED) {
1376 return -1;
1377 }
1378
1379 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1380 user, queue_idx);
1381 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1382 page_size, addr);
1383 g_free(name);
1384
1385 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1386 munmap(addr, page_size);
1387 return -1;
1388 }
1389
1390 n->addr = addr;
1391 n->set = true;
1392
1393 return 0;
1394 }
1395
1396 static void slave_read(void *opaque)
1397 {
1398 struct vhost_dev *dev = opaque;
1399 struct vhost_user *u = dev->opaque;
1400 VhostUserHeader hdr = { 0, };
1401 VhostUserPayload payload = { 0, };
1402 int size, ret = 0;
1403 struct iovec iov;
1404 struct msghdr msgh;
1405 int fd[VHOST_USER_SLAVE_MAX_FDS];
1406 char control[CMSG_SPACE(sizeof(fd))];
1407 struct cmsghdr *cmsg;
1408 int i, fdsize = 0;
1409
1410 memset(&msgh, 0, sizeof(msgh));
1411 msgh.msg_iov = &iov;
1412 msgh.msg_iovlen = 1;
1413 msgh.msg_control = control;
1414 msgh.msg_controllen = sizeof(control);
1415
1416 memset(fd, -1, sizeof(fd));
1417
1418 /* Read header */
1419 iov.iov_base = &hdr;
1420 iov.iov_len = VHOST_USER_HDR_SIZE;
1421
1422 do {
1423 size = recvmsg(u->slave_fd, &msgh, 0);
1424 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1425
1426 if (size != VHOST_USER_HDR_SIZE) {
1427 error_report("Failed to read from slave.");
1428 goto err;
1429 }
1430
1431 if (msgh.msg_flags & MSG_CTRUNC) {
1432 error_report("Truncated message.");
1433 goto err;
1434 }
1435
1436 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1437 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1438 if (cmsg->cmsg_level == SOL_SOCKET &&
1439 cmsg->cmsg_type == SCM_RIGHTS) {
1440 fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1441 memcpy(fd, CMSG_DATA(cmsg), fdsize);
1442 break;
1443 }
1444 }
1445
1446 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1447 error_report("Failed to read msg header."
1448 " Size %d exceeds the maximum %zu.", hdr.size,
1449 VHOST_USER_PAYLOAD_SIZE);
1450 goto err;
1451 }
1452
1453 /* Read payload */
1454 do {
1455 size = read(u->slave_fd, &payload, hdr.size);
1456 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1457
1458 if (size != hdr.size) {
1459 error_report("Failed to read payload from slave.");
1460 goto err;
1461 }
1462
1463 switch (hdr.request) {
1464 case VHOST_USER_SLAVE_IOTLB_MSG:
1465 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1466 break;
1467 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1468 ret = vhost_user_slave_handle_config_change(dev);
1469 break;
1470 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1471 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1472 fd[0]);
1473 break;
1474 default:
1475 error_report("Received unexpected msg type: %d.", hdr.request);
1476 ret = -EINVAL;
1477 }
1478
1479 /* Close the remaining file descriptors. */
1480 for (i = 0; i < fdsize; i++) {
1481 if (fd[i] != -1) {
1482 close(fd[i]);
1483 }
1484 }
1485
1486 /*
1487 * REPLY_ACK feature handling. Other reply types has to be managed
1488 * directly in their request handlers.
1489 */
1490 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1491 struct iovec iovec[2];
1492
1493
1494 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1495 hdr.flags |= VHOST_USER_REPLY_MASK;
1496
1497 payload.u64 = !!ret;
1498 hdr.size = sizeof(payload.u64);
1499
1500 iovec[0].iov_base = &hdr;
1501 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1502 iovec[1].iov_base = &payload;
1503 iovec[1].iov_len = hdr.size;
1504
1505 do {
1506 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1507 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1508
1509 if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1510 error_report("Failed to send msg reply to slave.");
1511 goto err;
1512 }
1513 }
1514
1515 return;
1516
1517 err:
1518 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1519 close(u->slave_fd);
1520 u->slave_fd = -1;
1521 for (i = 0; i < fdsize; i++) {
1522 if (fd[i] != -1) {
1523 close(fd[i]);
1524 }
1525 }
1526 return;
1527 }
1528
1529 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1530 {
1531 VhostUserMsg msg = {
1532 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1533 .hdr.flags = VHOST_USER_VERSION,
1534 };
1535 struct vhost_user *u = dev->opaque;
1536 int sv[2], ret = 0;
1537 bool reply_supported = virtio_has_feature(dev->protocol_features,
1538 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1539
1540 if (!virtio_has_feature(dev->protocol_features,
1541 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1542 return 0;
1543 }
1544
1545 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1546 error_report("socketpair() failed");
1547 return -1;
1548 }
1549
1550 u->slave_fd = sv[0];
1551 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1552
1553 if (reply_supported) {
1554 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1555 }
1556
1557 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1558 if (ret) {
1559 goto out;
1560 }
1561
1562 if (reply_supported) {
1563 ret = process_message_reply(dev, &msg);
1564 }
1565
1566 out:
1567 close(sv[1]);
1568 if (ret) {
1569 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1570 close(u->slave_fd);
1571 u->slave_fd = -1;
1572 }
1573
1574 return ret;
1575 }
1576
1577 #ifdef CONFIG_LINUX
1578 /*
1579 * Called back from the postcopy fault thread when a fault is received on our
1580 * ufd.
1581 * TODO: This is Linux specific
1582 */
1583 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1584 void *ufd)
1585 {
1586 struct vhost_dev *dev = pcfd->data;
1587 struct vhost_user *u = dev->opaque;
1588 struct uffd_msg *msg = ufd;
1589 uint64_t faultaddr = msg->arg.pagefault.address;
1590 RAMBlock *rb = NULL;
1591 uint64_t rb_offset;
1592 int i;
1593
1594 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1595 dev->mem->nregions);
1596 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1597 trace_vhost_user_postcopy_fault_handler_loop(i,
1598 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1599 if (faultaddr >= u->postcopy_client_bases[i]) {
1600 /* Ofset of the fault address in the vhost region */
1601 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1602 if (region_offset < dev->mem->regions[i].memory_size) {
1603 rb_offset = region_offset + u->region_rb_offset[i];
1604 trace_vhost_user_postcopy_fault_handler_found(i,
1605 region_offset, rb_offset);
1606 rb = u->region_rb[i];
1607 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1608 rb_offset);
1609 }
1610 }
1611 }
1612 error_report("%s: Failed to find region for fault %" PRIx64,
1613 __func__, faultaddr);
1614 return -1;
1615 }
1616
1617 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1618 uint64_t offset)
1619 {
1620 struct vhost_dev *dev = pcfd->data;
1621 struct vhost_user *u = dev->opaque;
1622 int i;
1623
1624 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1625
1626 if (!u) {
1627 return 0;
1628 }
1629 /* Translate the offset into an address in the clients address space */
1630 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1631 if (u->region_rb[i] == rb &&
1632 offset >= u->region_rb_offset[i] &&
1633 offset < (u->region_rb_offset[i] +
1634 dev->mem->regions[i].memory_size)) {
1635 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1636 u->postcopy_client_bases[i];
1637 trace_vhost_user_postcopy_waker_found(client_addr);
1638 return postcopy_wake_shared(pcfd, client_addr, rb);
1639 }
1640 }
1641
1642 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1643 return 0;
1644 }
1645 #endif
1646
1647 /*
1648 * Called at the start of an inbound postcopy on reception of the
1649 * 'advise' command.
1650 */
1651 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1652 {
1653 #ifdef CONFIG_LINUX
1654 struct vhost_user *u = dev->opaque;
1655 CharBackend *chr = u->user->chr;
1656 int ufd;
1657 VhostUserMsg msg = {
1658 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1659 .hdr.flags = VHOST_USER_VERSION,
1660 };
1661
1662 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1663 error_setg(errp, "Failed to send postcopy_advise to vhost");
1664 return -1;
1665 }
1666
1667 if (vhost_user_read(dev, &msg) < 0) {
1668 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1669 return -1;
1670 }
1671
1672 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1673 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1674 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1675 return -1;
1676 }
1677
1678 if (msg.hdr.size) {
1679 error_setg(errp, "Received bad msg size.");
1680 return -1;
1681 }
1682 ufd = qemu_chr_fe_get_msgfd(chr);
1683 if (ufd < 0) {
1684 error_setg(errp, "%s: Failed to get ufd", __func__);
1685 return -1;
1686 }
1687 qemu_set_nonblock(ufd);
1688
1689 /* register ufd with userfault thread */
1690 u->postcopy_fd.fd = ufd;
1691 u->postcopy_fd.data = dev;
1692 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1693 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1694 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1695 postcopy_register_shared_ufd(&u->postcopy_fd);
1696 return 0;
1697 #else
1698 error_setg(errp, "Postcopy not supported on non-Linux systems");
1699 return -1;
1700 #endif
1701 }
1702
1703 /*
1704 * Called at the switch to postcopy on reception of the 'listen' command.
1705 */
1706 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1707 {
1708 struct vhost_user *u = dev->opaque;
1709 int ret;
1710 VhostUserMsg msg = {
1711 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1712 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1713 };
1714 u->postcopy_listen = true;
1715 trace_vhost_user_postcopy_listen();
1716 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1717 error_setg(errp, "Failed to send postcopy_listen to vhost");
1718 return -1;
1719 }
1720
1721 ret = process_message_reply(dev, &msg);
1722 if (ret) {
1723 error_setg(errp, "Failed to receive reply to postcopy_listen");
1724 return ret;
1725 }
1726
1727 return 0;
1728 }
1729
1730 /*
1731 * Called at the end of postcopy
1732 */
1733 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1734 {
1735 VhostUserMsg msg = {
1736 .hdr.request = VHOST_USER_POSTCOPY_END,
1737 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1738 };
1739 int ret;
1740 struct vhost_user *u = dev->opaque;
1741
1742 trace_vhost_user_postcopy_end_entry();
1743 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1744 error_setg(errp, "Failed to send postcopy_end to vhost");
1745 return -1;
1746 }
1747
1748 ret = process_message_reply(dev, &msg);
1749 if (ret) {
1750 error_setg(errp, "Failed to receive reply to postcopy_end");
1751 return ret;
1752 }
1753 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1754 close(u->postcopy_fd.fd);
1755 u->postcopy_fd.handler = NULL;
1756
1757 trace_vhost_user_postcopy_end_exit();
1758
1759 return 0;
1760 }
1761
1762 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1763 void *opaque)
1764 {
1765 struct PostcopyNotifyData *pnd = opaque;
1766 struct vhost_user *u = container_of(notifier, struct vhost_user,
1767 postcopy_notifier);
1768 struct vhost_dev *dev = u->dev;
1769
1770 switch (pnd->reason) {
1771 case POSTCOPY_NOTIFY_PROBE:
1772 if (!virtio_has_feature(dev->protocol_features,
1773 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1774 /* TODO: Get the device name into this error somehow */
1775 error_setg(pnd->errp,
1776 "vhost-user backend not capable of postcopy");
1777 return -ENOENT;
1778 }
1779 break;
1780
1781 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1782 return vhost_user_postcopy_advise(dev, pnd->errp);
1783
1784 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1785 return vhost_user_postcopy_listen(dev, pnd->errp);
1786
1787 case POSTCOPY_NOTIFY_INBOUND_END:
1788 return vhost_user_postcopy_end(dev, pnd->errp);
1789
1790 default:
1791 /* We ignore notifications we don't know */
1792 break;
1793 }
1794
1795 return 0;
1796 }
1797
1798 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1799 {
1800 uint64_t features, protocol_features, ram_slots;
1801 struct vhost_user *u;
1802 int err;
1803
1804 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1805
1806 u = g_new0(struct vhost_user, 1);
1807 u->user = opaque;
1808 u->slave_fd = -1;
1809 u->dev = dev;
1810 dev->opaque = u;
1811
1812 err = vhost_user_get_features(dev, &features);
1813 if (err < 0) {
1814 return err;
1815 }
1816
1817 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1818 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1819
1820 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1821 &protocol_features);
1822 if (err < 0) {
1823 return err;
1824 }
1825
1826 dev->protocol_features =
1827 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1828
1829 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1830 /* Don't acknowledge CONFIG feature if device doesn't support it */
1831 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1832 } else if (!(protocol_features &
1833 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1834 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1835 "but backend does not support it.");
1836 return -1;
1837 }
1838
1839 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1840 if (err < 0) {
1841 return err;
1842 }
1843
1844 /* query the max queues we support if backend supports Multiple Queue */
1845 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1846 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1847 &dev->max_queues);
1848 if (err < 0) {
1849 return err;
1850 }
1851 }
1852
1853 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1854 !(virtio_has_feature(dev->protocol_features,
1855 VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1856 virtio_has_feature(dev->protocol_features,
1857 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1858 error_report("IOMMU support requires reply-ack and "
1859 "slave-req protocol features.");
1860 return -1;
1861 }
1862
1863 /* get max memory regions if backend supports configurable RAM slots */
1864 if (!virtio_has_feature(dev->protocol_features,
1865 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
1866 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
1867 } else {
1868 err = vhost_user_get_max_memslots(dev, &ram_slots);
1869 if (err < 0) {
1870 return err;
1871 }
1872
1873 if (ram_slots < u->user->memory_slots) {
1874 error_report("The backend specified a max ram slots limit "
1875 "of %" PRIu64", when the prior validated limit was %d. "
1876 "This limit should never decrease.", ram_slots,
1877 u->user->memory_slots);
1878 return -1;
1879 }
1880
1881 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
1882 }
1883 }
1884
1885 if (dev->migration_blocker == NULL &&
1886 !virtio_has_feature(dev->protocol_features,
1887 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1888 error_setg(&dev->migration_blocker,
1889 "Migration disabled: vhost-user backend lacks "
1890 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1891 }
1892
1893 if (dev->vq_index == 0) {
1894 err = vhost_setup_slave_channel(dev);
1895 if (err < 0) {
1896 return err;
1897 }
1898 }
1899
1900 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1901 postcopy_add_notifier(&u->postcopy_notifier);
1902
1903 return 0;
1904 }
1905
1906 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1907 {
1908 struct vhost_user *u;
1909
1910 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1911
1912 u = dev->opaque;
1913 if (u->postcopy_notifier.notify) {
1914 postcopy_remove_notifier(&u->postcopy_notifier);
1915 u->postcopy_notifier.notify = NULL;
1916 }
1917 u->postcopy_listen = false;
1918 if (u->postcopy_fd.handler) {
1919 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1920 close(u->postcopy_fd.fd);
1921 u->postcopy_fd.handler = NULL;
1922 }
1923 if (u->slave_fd >= 0) {
1924 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1925 close(u->slave_fd);
1926 u->slave_fd = -1;
1927 }
1928 g_free(u->region_rb);
1929 u->region_rb = NULL;
1930 g_free(u->region_rb_offset);
1931 u->region_rb_offset = NULL;
1932 u->region_rb_len = 0;
1933 g_free(u);
1934 dev->opaque = 0;
1935
1936 return 0;
1937 }
1938
1939 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1940 {
1941 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1942
1943 return idx;
1944 }
1945
1946 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1947 {
1948 struct vhost_user *u = dev->opaque;
1949
1950 return u->user->memory_slots;
1951 }
1952
1953 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1954 {
1955 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1956
1957 return virtio_has_feature(dev->protocol_features,
1958 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1959 }
1960
1961 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1962 {
1963 VhostUserMsg msg = { };
1964
1965 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1966
1967 /* If guest supports GUEST_ANNOUNCE do nothing */
1968 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1969 return 0;
1970 }
1971
1972 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1973 if (virtio_has_feature(dev->protocol_features,
1974 VHOST_USER_PROTOCOL_F_RARP)) {
1975 msg.hdr.request = VHOST_USER_SEND_RARP;
1976 msg.hdr.flags = VHOST_USER_VERSION;
1977 memcpy((char *)&msg.payload.u64, mac_addr, 6);
1978 msg.hdr.size = sizeof(msg.payload.u64);
1979
1980 return vhost_user_write(dev, &msg, NULL, 0);
1981 }
1982 return -1;
1983 }
1984
1985 static bool vhost_user_can_merge(struct vhost_dev *dev,
1986 uint64_t start1, uint64_t size1,
1987 uint64_t start2, uint64_t size2)
1988 {
1989 ram_addr_t offset;
1990 int mfd, rfd;
1991
1992 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
1993 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
1994
1995 return mfd == rfd;
1996 }
1997
1998 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1999 {
2000 VhostUserMsg msg;
2001 bool reply_supported = virtio_has_feature(dev->protocol_features,
2002 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2003
2004 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2005 return 0;
2006 }
2007
2008 msg.hdr.request = VHOST_USER_NET_SET_MTU;
2009 msg.payload.u64 = mtu;
2010 msg.hdr.size = sizeof(msg.payload.u64);
2011 msg.hdr.flags = VHOST_USER_VERSION;
2012 if (reply_supported) {
2013 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2014 }
2015
2016 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2017 return -1;
2018 }
2019
2020 /* If reply_ack supported, slave has to ack specified MTU is valid */
2021 if (reply_supported) {
2022 return process_message_reply(dev, &msg);
2023 }
2024
2025 return 0;
2026 }
2027
2028 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2029 struct vhost_iotlb_msg *imsg)
2030 {
2031 VhostUserMsg msg = {
2032 .hdr.request = VHOST_USER_IOTLB_MSG,
2033 .hdr.size = sizeof(msg.payload.iotlb),
2034 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2035 .payload.iotlb = *imsg,
2036 };
2037
2038 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2039 return -EFAULT;
2040 }
2041
2042 return process_message_reply(dev, &msg);
2043 }
2044
2045
2046 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2047 {
2048 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2049 }
2050
2051 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2052 uint32_t config_len)
2053 {
2054 VhostUserMsg msg = {
2055 .hdr.request = VHOST_USER_GET_CONFIG,
2056 .hdr.flags = VHOST_USER_VERSION,
2057 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2058 };
2059
2060 if (!virtio_has_feature(dev->protocol_features,
2061 VHOST_USER_PROTOCOL_F_CONFIG)) {
2062 return -1;
2063 }
2064
2065 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
2066 return -1;
2067 }
2068
2069 msg.payload.config.offset = 0;
2070 msg.payload.config.size = config_len;
2071 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2072 return -1;
2073 }
2074
2075 if (vhost_user_read(dev, &msg) < 0) {
2076 return -1;
2077 }
2078
2079 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2080 error_report("Received unexpected msg type. Expected %d received %d",
2081 VHOST_USER_GET_CONFIG, msg.hdr.request);
2082 return -1;
2083 }
2084
2085 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2086 error_report("Received bad msg size.");
2087 return -1;
2088 }
2089
2090 memcpy(config, msg.payload.config.region, config_len);
2091
2092 return 0;
2093 }
2094
2095 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2096 uint32_t offset, uint32_t size, uint32_t flags)
2097 {
2098 uint8_t *p;
2099 bool reply_supported = virtio_has_feature(dev->protocol_features,
2100 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2101
2102 VhostUserMsg msg = {
2103 .hdr.request = VHOST_USER_SET_CONFIG,
2104 .hdr.flags = VHOST_USER_VERSION,
2105 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2106 };
2107
2108 if (!virtio_has_feature(dev->protocol_features,
2109 VHOST_USER_PROTOCOL_F_CONFIG)) {
2110 return -1;
2111 }
2112
2113 if (reply_supported) {
2114 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2115 }
2116
2117 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2118 return -1;
2119 }
2120
2121 msg.payload.config.offset = offset,
2122 msg.payload.config.size = size,
2123 msg.payload.config.flags = flags,
2124 p = msg.payload.config.region;
2125 memcpy(p, data, size);
2126
2127 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2128 return -1;
2129 }
2130
2131 if (reply_supported) {
2132 return process_message_reply(dev, &msg);
2133 }
2134
2135 return 0;
2136 }
2137
2138 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2139 void *session_info,
2140 uint64_t *session_id)
2141 {
2142 bool crypto_session = virtio_has_feature(dev->protocol_features,
2143 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2144 CryptoDevBackendSymSessionInfo *sess_info = session_info;
2145 VhostUserMsg msg = {
2146 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2147 .hdr.flags = VHOST_USER_VERSION,
2148 .hdr.size = sizeof(msg.payload.session),
2149 };
2150
2151 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2152
2153 if (!crypto_session) {
2154 error_report("vhost-user trying to send unhandled ioctl");
2155 return -1;
2156 }
2157
2158 memcpy(&msg.payload.session.session_setup_data, sess_info,
2159 sizeof(CryptoDevBackendSymSessionInfo));
2160 if (sess_info->key_len) {
2161 memcpy(&msg.payload.session.key, sess_info->cipher_key,
2162 sess_info->key_len);
2163 }
2164 if (sess_info->auth_key_len > 0) {
2165 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2166 sess_info->auth_key_len);
2167 }
2168 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2169 error_report("vhost_user_write() return -1, create session failed");
2170 return -1;
2171 }
2172
2173 if (vhost_user_read(dev, &msg) < 0) {
2174 error_report("vhost_user_read() return -1, create session failed");
2175 return -1;
2176 }
2177
2178 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2179 error_report("Received unexpected msg type. Expected %d received %d",
2180 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2181 return -1;
2182 }
2183
2184 if (msg.hdr.size != sizeof(msg.payload.session)) {
2185 error_report("Received bad msg size.");
2186 return -1;
2187 }
2188
2189 if (msg.payload.session.session_id < 0) {
2190 error_report("Bad session id: %" PRId64 "",
2191 msg.payload.session.session_id);
2192 return -1;
2193 }
2194 *session_id = msg.payload.session.session_id;
2195
2196 return 0;
2197 }
2198
2199 static int
2200 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2201 {
2202 bool crypto_session = virtio_has_feature(dev->protocol_features,
2203 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2204 VhostUserMsg msg = {
2205 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2206 .hdr.flags = VHOST_USER_VERSION,
2207 .hdr.size = sizeof(msg.payload.u64),
2208 };
2209 msg.payload.u64 = session_id;
2210
2211 if (!crypto_session) {
2212 error_report("vhost-user trying to send unhandled ioctl");
2213 return -1;
2214 }
2215
2216 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2217 error_report("vhost_user_write() return -1, close session failed");
2218 return -1;
2219 }
2220
2221 return 0;
2222 }
2223
2224 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2225 MemoryRegionSection *section)
2226 {
2227 bool result;
2228
2229 result = memory_region_get_fd(section->mr) >= 0;
2230
2231 return result;
2232 }
2233
2234 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2235 uint16_t queue_size,
2236 struct vhost_inflight *inflight)
2237 {
2238 void *addr;
2239 int fd;
2240 struct vhost_user *u = dev->opaque;
2241 CharBackend *chr = u->user->chr;
2242 VhostUserMsg msg = {
2243 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2244 .hdr.flags = VHOST_USER_VERSION,
2245 .payload.inflight.num_queues = dev->nvqs,
2246 .payload.inflight.queue_size = queue_size,
2247 .hdr.size = sizeof(msg.payload.inflight),
2248 };
2249
2250 if (!virtio_has_feature(dev->protocol_features,
2251 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2252 return 0;
2253 }
2254
2255 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2256 return -1;
2257 }
2258
2259 if (vhost_user_read(dev, &msg) < 0) {
2260 return -1;
2261 }
2262
2263 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2264 error_report("Received unexpected msg type. "
2265 "Expected %d received %d",
2266 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2267 return -1;
2268 }
2269
2270 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2271 error_report("Received bad msg size.");
2272 return -1;
2273 }
2274
2275 if (!msg.payload.inflight.mmap_size) {
2276 return 0;
2277 }
2278
2279 fd = qemu_chr_fe_get_msgfd(chr);
2280 if (fd < 0) {
2281 error_report("Failed to get mem fd");
2282 return -1;
2283 }
2284
2285 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2286 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2287
2288 if (addr == MAP_FAILED) {
2289 error_report("Failed to mmap mem fd");
2290 close(fd);
2291 return -1;
2292 }
2293
2294 inflight->addr = addr;
2295 inflight->fd = fd;
2296 inflight->size = msg.payload.inflight.mmap_size;
2297 inflight->offset = msg.payload.inflight.mmap_offset;
2298 inflight->queue_size = queue_size;
2299
2300 return 0;
2301 }
2302
2303 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2304 struct vhost_inflight *inflight)
2305 {
2306 VhostUserMsg msg = {
2307 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2308 .hdr.flags = VHOST_USER_VERSION,
2309 .payload.inflight.mmap_size = inflight->size,
2310 .payload.inflight.mmap_offset = inflight->offset,
2311 .payload.inflight.num_queues = dev->nvqs,
2312 .payload.inflight.queue_size = inflight->queue_size,
2313 .hdr.size = sizeof(msg.payload.inflight),
2314 };
2315
2316 if (!virtio_has_feature(dev->protocol_features,
2317 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2318 return 0;
2319 }
2320
2321 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
2322 return -1;
2323 }
2324
2325 return 0;
2326 }
2327
2328 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2329 {
2330 if (user->chr) {
2331 error_setg(errp, "Cannot initialize vhost-user state");
2332 return false;
2333 }
2334 user->chr = chr;
2335 user->memory_slots = 0;
2336 return true;
2337 }
2338
2339 void vhost_user_cleanup(VhostUserState *user)
2340 {
2341 int i;
2342
2343 if (!user->chr) {
2344 return;
2345 }
2346
2347 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2348 if (user->notifier[i].addr) {
2349 object_unparent(OBJECT(&user->notifier[i].mr));
2350 munmap(user->notifier[i].addr, qemu_real_host_page_size);
2351 user->notifier[i].addr = NULL;
2352 }
2353 }
2354 user->chr = NULL;
2355 }
2356
2357 const VhostOps user_ops = {
2358 .backend_type = VHOST_BACKEND_TYPE_USER,
2359 .vhost_backend_init = vhost_user_backend_init,
2360 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2361 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2362 .vhost_set_log_base = vhost_user_set_log_base,
2363 .vhost_set_mem_table = vhost_user_set_mem_table,
2364 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2365 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2366 .vhost_set_vring_num = vhost_user_set_vring_num,
2367 .vhost_set_vring_base = vhost_user_set_vring_base,
2368 .vhost_get_vring_base = vhost_user_get_vring_base,
2369 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2370 .vhost_set_vring_call = vhost_user_set_vring_call,
2371 .vhost_set_features = vhost_user_set_features,
2372 .vhost_get_features = vhost_user_get_features,
2373 .vhost_set_owner = vhost_user_set_owner,
2374 .vhost_reset_device = vhost_user_reset_device,
2375 .vhost_get_vq_index = vhost_user_get_vq_index,
2376 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2377 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2378 .vhost_migration_done = vhost_user_migration_done,
2379 .vhost_backend_can_merge = vhost_user_can_merge,
2380 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2381 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2382 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2383 .vhost_get_config = vhost_user_get_config,
2384 .vhost_set_config = vhost_user_set_config,
2385 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2386 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2387 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2388 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2389 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2390 };