Merge tag 'edgar/xilinx-next-2022-09-21.for-upstream' of https://github.com/edgarigl...
[qemu.git] / hw / virtio / vhost-user.c
1 /*
2 * vhost-user
3 *
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
5 *
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
8 *
9 */
10
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "io/channel-socket.h"
20 #include "sysemu/kvm.h"
21 #include "qemu/error-report.h"
22 #include "qemu/main-loop.h"
23 #include "qemu/sockets.h"
24 #include "sysemu/cryptodev.h"
25 #include "migration/migration.h"
26 #include "migration/postcopy-ram.h"
27 #include "trace.h"
28
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
31 #include <sys/un.h>
32
33 #include "standard-headers/linux/vhost_types.h"
34
35 #ifdef CONFIG_LINUX
36 #include <linux/userfaultfd.h>
37 #endif
38
39 #define VHOST_MEMORY_BASELINE_NREGIONS 8
40 #define VHOST_USER_F_PROTOCOL_FEATURES 30
41 #define VHOST_USER_SLAVE_MAX_FDS 8
42
43 /*
44 * Set maximum number of RAM slots supported to
45 * the maximum number supported by the target
46 * hardware plaform.
47 */
48 #if defined(TARGET_X86) || defined(TARGET_X86_64) || \
49 defined(TARGET_ARM) || defined(TARGET_ARM_64)
50 #include "hw/acpi/acpi.h"
51 #define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS
52
53 #elif defined(TARGET_PPC) || defined(TARGET_PPC_64)
54 #include "hw/ppc/spapr.h"
55 #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS
56
57 #else
58 #define VHOST_USER_MAX_RAM_SLOTS 512
59 #endif
60
61 /*
62 * Maximum size of virtio device config space
63 */
64 #define VHOST_USER_MAX_CONFIG_SIZE 256
65
66 enum VhostUserProtocolFeature {
67 VHOST_USER_PROTOCOL_F_MQ = 0,
68 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
69 VHOST_USER_PROTOCOL_F_RARP = 2,
70 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
71 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
72 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
73 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
74 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
75 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
76 VHOST_USER_PROTOCOL_F_CONFIG = 9,
77 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
78 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
79 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
80 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
81 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
82 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
83 VHOST_USER_PROTOCOL_F_MAX
84 };
85
86 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
87
88 typedef enum VhostUserRequest {
89 VHOST_USER_NONE = 0,
90 VHOST_USER_GET_FEATURES = 1,
91 VHOST_USER_SET_FEATURES = 2,
92 VHOST_USER_SET_OWNER = 3,
93 VHOST_USER_RESET_OWNER = 4,
94 VHOST_USER_SET_MEM_TABLE = 5,
95 VHOST_USER_SET_LOG_BASE = 6,
96 VHOST_USER_SET_LOG_FD = 7,
97 VHOST_USER_SET_VRING_NUM = 8,
98 VHOST_USER_SET_VRING_ADDR = 9,
99 VHOST_USER_SET_VRING_BASE = 10,
100 VHOST_USER_GET_VRING_BASE = 11,
101 VHOST_USER_SET_VRING_KICK = 12,
102 VHOST_USER_SET_VRING_CALL = 13,
103 VHOST_USER_SET_VRING_ERR = 14,
104 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
105 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
106 VHOST_USER_GET_QUEUE_NUM = 17,
107 VHOST_USER_SET_VRING_ENABLE = 18,
108 VHOST_USER_SEND_RARP = 19,
109 VHOST_USER_NET_SET_MTU = 20,
110 VHOST_USER_SET_SLAVE_REQ_FD = 21,
111 VHOST_USER_IOTLB_MSG = 22,
112 VHOST_USER_SET_VRING_ENDIAN = 23,
113 VHOST_USER_GET_CONFIG = 24,
114 VHOST_USER_SET_CONFIG = 25,
115 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
116 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
117 VHOST_USER_POSTCOPY_ADVISE = 28,
118 VHOST_USER_POSTCOPY_LISTEN = 29,
119 VHOST_USER_POSTCOPY_END = 30,
120 VHOST_USER_GET_INFLIGHT_FD = 31,
121 VHOST_USER_SET_INFLIGHT_FD = 32,
122 VHOST_USER_GPU_SET_SOCKET = 33,
123 VHOST_USER_RESET_DEVICE = 34,
124 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
125 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
126 VHOST_USER_ADD_MEM_REG = 37,
127 VHOST_USER_REM_MEM_REG = 38,
128 VHOST_USER_MAX
129 } VhostUserRequest;
130
131 typedef enum VhostUserSlaveRequest {
132 VHOST_USER_SLAVE_NONE = 0,
133 VHOST_USER_SLAVE_IOTLB_MSG = 1,
134 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
135 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
136 VHOST_USER_SLAVE_MAX
137 } VhostUserSlaveRequest;
138
139 typedef struct VhostUserMemoryRegion {
140 uint64_t guest_phys_addr;
141 uint64_t memory_size;
142 uint64_t userspace_addr;
143 uint64_t mmap_offset;
144 } VhostUserMemoryRegion;
145
146 typedef struct VhostUserMemory {
147 uint32_t nregions;
148 uint32_t padding;
149 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
150 } VhostUserMemory;
151
152 typedef struct VhostUserMemRegMsg {
153 uint64_t padding;
154 VhostUserMemoryRegion region;
155 } VhostUserMemRegMsg;
156
157 typedef struct VhostUserLog {
158 uint64_t mmap_size;
159 uint64_t mmap_offset;
160 } VhostUserLog;
161
162 typedef struct VhostUserConfig {
163 uint32_t offset;
164 uint32_t size;
165 uint32_t flags;
166 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
167 } VhostUserConfig;
168
169 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
170 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
171
172 typedef struct VhostUserCryptoSession {
173 /* session id for success, -1 on errors */
174 int64_t session_id;
175 CryptoDevBackendSymSessionInfo session_setup_data;
176 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
177 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
178 } VhostUserCryptoSession;
179
180 static VhostUserConfig c __attribute__ ((unused));
181 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
182 + sizeof(c.size) \
183 + sizeof(c.flags))
184
185 typedef struct VhostUserVringArea {
186 uint64_t u64;
187 uint64_t size;
188 uint64_t offset;
189 } VhostUserVringArea;
190
191 typedef struct VhostUserInflight {
192 uint64_t mmap_size;
193 uint64_t mmap_offset;
194 uint16_t num_queues;
195 uint16_t queue_size;
196 } VhostUserInflight;
197
198 typedef struct {
199 VhostUserRequest request;
200
201 #define VHOST_USER_VERSION_MASK (0x3)
202 #define VHOST_USER_REPLY_MASK (0x1<<2)
203 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
204 uint32_t flags;
205 uint32_t size; /* the following payload size */
206 } QEMU_PACKED VhostUserHeader;
207
208 typedef union {
209 #define VHOST_USER_VRING_IDX_MASK (0xff)
210 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
211 uint64_t u64;
212 struct vhost_vring_state state;
213 struct vhost_vring_addr addr;
214 VhostUserMemory memory;
215 VhostUserMemRegMsg mem_reg;
216 VhostUserLog log;
217 struct vhost_iotlb_msg iotlb;
218 VhostUserConfig config;
219 VhostUserCryptoSession session;
220 VhostUserVringArea area;
221 VhostUserInflight inflight;
222 } VhostUserPayload;
223
224 typedef struct VhostUserMsg {
225 VhostUserHeader hdr;
226 VhostUserPayload payload;
227 } QEMU_PACKED VhostUserMsg;
228
229 static VhostUserMsg m __attribute__ ((unused));
230 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
231
232 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
233
234 /* The version of the protocol we support */
235 #define VHOST_USER_VERSION (0x1)
236
237 struct vhost_user {
238 struct vhost_dev *dev;
239 /* Shared between vhost devs of the same virtio device */
240 VhostUserState *user;
241 QIOChannel *slave_ioc;
242 GSource *slave_src;
243 NotifierWithReturn postcopy_notifier;
244 struct PostCopyFD postcopy_fd;
245 uint64_t postcopy_client_bases[VHOST_USER_MAX_RAM_SLOTS];
246 /* Length of the region_rb and region_rb_offset arrays */
247 size_t region_rb_len;
248 /* RAMBlock associated with a given region */
249 RAMBlock **region_rb;
250 /* The offset from the start of the RAMBlock to the start of the
251 * vhost region.
252 */
253 ram_addr_t *region_rb_offset;
254
255 /* True once we've entered postcopy_listen */
256 bool postcopy_listen;
257
258 /* Our current regions */
259 int num_shadow_regions;
260 struct vhost_memory_region shadow_regions[VHOST_USER_MAX_RAM_SLOTS];
261 };
262
263 struct scrub_regions {
264 struct vhost_memory_region *region;
265 int reg_idx;
266 int fd_idx;
267 };
268
269 static bool ioeventfd_enabled(void)
270 {
271 return !kvm_enabled() || kvm_eventfds_enabled();
272 }
273
274 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
275 {
276 struct vhost_user *u = dev->opaque;
277 CharBackend *chr = u->user->chr;
278 uint8_t *p = (uint8_t *) msg;
279 int r, size = VHOST_USER_HDR_SIZE;
280
281 r = qemu_chr_fe_read_all(chr, p, size);
282 if (r != size) {
283 int saved_errno = errno;
284 error_report("Failed to read msg header. Read %d instead of %d."
285 " Original request %d.", r, size, msg->hdr.request);
286 return r < 0 ? -saved_errno : -EIO;
287 }
288
289 /* validate received flags */
290 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
291 error_report("Failed to read msg header."
292 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
293 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
294 return -EPROTO;
295 }
296
297 return 0;
298 }
299
300 struct vhost_user_read_cb_data {
301 struct vhost_dev *dev;
302 VhostUserMsg *msg;
303 GMainLoop *loop;
304 int ret;
305 };
306
307 static gboolean vhost_user_read_cb(void *do_not_use, GIOCondition condition,
308 gpointer opaque)
309 {
310 struct vhost_user_read_cb_data *data = opaque;
311 struct vhost_dev *dev = data->dev;
312 VhostUserMsg *msg = data->msg;
313 struct vhost_user *u = dev->opaque;
314 CharBackend *chr = u->user->chr;
315 uint8_t *p = (uint8_t *) msg;
316 int r, size;
317
318 r = vhost_user_read_header(dev, msg);
319 if (r < 0) {
320 data->ret = r;
321 goto end;
322 }
323
324 /* validate message size is sane */
325 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
326 error_report("Failed to read msg header."
327 " Size %d exceeds the maximum %zu.", msg->hdr.size,
328 VHOST_USER_PAYLOAD_SIZE);
329 data->ret = -EPROTO;
330 goto end;
331 }
332
333 if (msg->hdr.size) {
334 p += VHOST_USER_HDR_SIZE;
335 size = msg->hdr.size;
336 r = qemu_chr_fe_read_all(chr, p, size);
337 if (r != size) {
338 int saved_errno = errno;
339 error_report("Failed to read msg payload."
340 " Read %d instead of %d.", r, msg->hdr.size);
341 data->ret = r < 0 ? -saved_errno : -EIO;
342 goto end;
343 }
344 }
345
346 end:
347 g_main_loop_quit(data->loop);
348 return G_SOURCE_REMOVE;
349 }
350
351 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
352 gpointer opaque);
353
354 /*
355 * This updates the read handler to use a new event loop context.
356 * Event sources are removed from the previous context : this ensures
357 * that events detected in the previous context are purged. They will
358 * be re-detected and processed in the new context.
359 */
360 static void slave_update_read_handler(struct vhost_dev *dev,
361 GMainContext *ctxt)
362 {
363 struct vhost_user *u = dev->opaque;
364
365 if (!u->slave_ioc) {
366 return;
367 }
368
369 if (u->slave_src) {
370 g_source_destroy(u->slave_src);
371 g_source_unref(u->slave_src);
372 }
373
374 u->slave_src = qio_channel_add_watch_source(u->slave_ioc,
375 G_IO_IN | G_IO_HUP,
376 slave_read, dev, NULL,
377 ctxt);
378 }
379
380 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
381 {
382 struct vhost_user *u = dev->opaque;
383 CharBackend *chr = u->user->chr;
384 GMainContext *prev_ctxt = chr->chr->gcontext;
385 GMainContext *ctxt = g_main_context_new();
386 GMainLoop *loop = g_main_loop_new(ctxt, FALSE);
387 struct vhost_user_read_cb_data data = {
388 .dev = dev,
389 .loop = loop,
390 .msg = msg,
391 .ret = 0
392 };
393
394 /*
395 * We want to be able to monitor the slave channel fd while waiting
396 * for chr I/O. This requires an event loop, but we can't nest the
397 * one to which chr is currently attached : its fd handlers might not
398 * be prepared for re-entrancy. So we create a new one and switch chr
399 * to use it.
400 */
401 slave_update_read_handler(dev, ctxt);
402 qemu_chr_be_update_read_handlers(chr->chr, ctxt);
403 qemu_chr_fe_add_watch(chr, G_IO_IN | G_IO_HUP, vhost_user_read_cb, &data);
404
405 g_main_loop_run(loop);
406
407 /*
408 * Restore the previous event loop context. This also destroys/recreates
409 * event sources : this guarantees that all pending events in the original
410 * context that have been processed by the nested loop are purged.
411 */
412 qemu_chr_be_update_read_handlers(chr->chr, prev_ctxt);
413 slave_update_read_handler(dev, NULL);
414
415 g_main_loop_unref(loop);
416 g_main_context_unref(ctxt);
417
418 return data.ret;
419 }
420
421 static int process_message_reply(struct vhost_dev *dev,
422 const VhostUserMsg *msg)
423 {
424 int ret;
425 VhostUserMsg msg_reply;
426
427 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
428 return 0;
429 }
430
431 ret = vhost_user_read(dev, &msg_reply);
432 if (ret < 0) {
433 return ret;
434 }
435
436 if (msg_reply.hdr.request != msg->hdr.request) {
437 error_report("Received unexpected msg type. "
438 "Expected %d received %d",
439 msg->hdr.request, msg_reply.hdr.request);
440 return -EPROTO;
441 }
442
443 return msg_reply.payload.u64 ? -EIO : 0;
444 }
445
446 static bool vhost_user_one_time_request(VhostUserRequest request)
447 {
448 switch (request) {
449 case VHOST_USER_SET_OWNER:
450 case VHOST_USER_RESET_OWNER:
451 case VHOST_USER_SET_MEM_TABLE:
452 case VHOST_USER_GET_QUEUE_NUM:
453 case VHOST_USER_NET_SET_MTU:
454 return true;
455 default:
456 return false;
457 }
458 }
459
460 /* most non-init callers ignore the error */
461 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
462 int *fds, int fd_num)
463 {
464 struct vhost_user *u = dev->opaque;
465 CharBackend *chr = u->user->chr;
466 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
467
468 /*
469 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
470 * we just need send it once in the first time. For later such
471 * request, we just ignore it.
472 */
473 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
474 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
475 return 0;
476 }
477
478 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
479 error_report("Failed to set msg fds.");
480 return -EINVAL;
481 }
482
483 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
484 if (ret != size) {
485 int saved_errno = errno;
486 error_report("Failed to write msg."
487 " Wrote %d instead of %d.", ret, size);
488 return ret < 0 ? -saved_errno : -EIO;
489 }
490
491 return 0;
492 }
493
494 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
495 {
496 VhostUserMsg msg = {
497 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
498 .hdr.flags = VHOST_USER_VERSION,
499 };
500
501 return vhost_user_write(dev, &msg, &fd, 1);
502 }
503
504 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
505 struct vhost_log *log)
506 {
507 int fds[VHOST_USER_MAX_RAM_SLOTS];
508 size_t fd_num = 0;
509 bool shmfd = virtio_has_feature(dev->protocol_features,
510 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
511 int ret;
512 VhostUserMsg msg = {
513 .hdr.request = VHOST_USER_SET_LOG_BASE,
514 .hdr.flags = VHOST_USER_VERSION,
515 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
516 .payload.log.mmap_offset = 0,
517 .hdr.size = sizeof(msg.payload.log),
518 };
519
520 if (shmfd && log->fd != -1) {
521 fds[fd_num++] = log->fd;
522 }
523
524 ret = vhost_user_write(dev, &msg, fds, fd_num);
525 if (ret < 0) {
526 return ret;
527 }
528
529 if (shmfd) {
530 msg.hdr.size = 0;
531 ret = vhost_user_read(dev, &msg);
532 if (ret < 0) {
533 return ret;
534 }
535
536 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
537 error_report("Received unexpected msg type. "
538 "Expected %d received %d",
539 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
540 return -EPROTO;
541 }
542 }
543
544 return 0;
545 }
546
547 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
548 int *fd)
549 {
550 MemoryRegion *mr;
551
552 assert((uintptr_t)addr == addr);
553 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
554 *fd = memory_region_get_fd(mr);
555
556 return mr;
557 }
558
559 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
560 struct vhost_memory_region *src,
561 uint64_t mmap_offset)
562 {
563 assert(src != NULL && dst != NULL);
564 dst->userspace_addr = src->userspace_addr;
565 dst->memory_size = src->memory_size;
566 dst->guest_phys_addr = src->guest_phys_addr;
567 dst->mmap_offset = mmap_offset;
568 }
569
570 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
571 struct vhost_dev *dev,
572 VhostUserMsg *msg,
573 int *fds, size_t *fd_num,
574 bool track_ramblocks)
575 {
576 int i, fd;
577 ram_addr_t offset;
578 MemoryRegion *mr;
579 struct vhost_memory_region *reg;
580 VhostUserMemoryRegion region_buffer;
581
582 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
583
584 for (i = 0; i < dev->mem->nregions; ++i) {
585 reg = dev->mem->regions + i;
586
587 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
588 if (fd > 0) {
589 if (track_ramblocks) {
590 assert(*fd_num < VHOST_MEMORY_BASELINE_NREGIONS);
591 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
592 reg->memory_size,
593 reg->guest_phys_addr,
594 reg->userspace_addr,
595 offset);
596 u->region_rb_offset[i] = offset;
597 u->region_rb[i] = mr->ram_block;
598 } else if (*fd_num == VHOST_MEMORY_BASELINE_NREGIONS) {
599 error_report("Failed preparing vhost-user memory table msg");
600 return -ENOBUFS;
601 }
602 vhost_user_fill_msg_region(&region_buffer, reg, offset);
603 msg->payload.memory.regions[*fd_num] = region_buffer;
604 fds[(*fd_num)++] = fd;
605 } else if (track_ramblocks) {
606 u->region_rb_offset[i] = 0;
607 u->region_rb[i] = NULL;
608 }
609 }
610
611 msg->payload.memory.nregions = *fd_num;
612
613 if (!*fd_num) {
614 error_report("Failed initializing vhost-user memory map, "
615 "consider using -object memory-backend-file share=on");
616 return -EINVAL;
617 }
618
619 msg->hdr.size = sizeof(msg->payload.memory.nregions);
620 msg->hdr.size += sizeof(msg->payload.memory.padding);
621 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
622
623 return 0;
624 }
625
626 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
627 struct vhost_memory_region *vdev_reg)
628 {
629 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
630 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
631 shadow_reg->memory_size == vdev_reg->memory_size;
632 }
633
634 static void scrub_shadow_regions(struct vhost_dev *dev,
635 struct scrub_regions *add_reg,
636 int *nr_add_reg,
637 struct scrub_regions *rem_reg,
638 int *nr_rem_reg, uint64_t *shadow_pcb,
639 bool track_ramblocks)
640 {
641 struct vhost_user *u = dev->opaque;
642 bool found[VHOST_USER_MAX_RAM_SLOTS] = {};
643 struct vhost_memory_region *reg, *shadow_reg;
644 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
645 ram_addr_t offset;
646 MemoryRegion *mr;
647 bool matching;
648
649 /*
650 * Find memory regions present in our shadow state which are not in
651 * the device's current memory state.
652 *
653 * Mark regions in both the shadow and device state as "found".
654 */
655 for (i = 0; i < u->num_shadow_regions; i++) {
656 shadow_reg = &u->shadow_regions[i];
657 matching = false;
658
659 for (j = 0; j < dev->mem->nregions; j++) {
660 reg = &dev->mem->regions[j];
661
662 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
663
664 if (reg_equal(shadow_reg, reg)) {
665 matching = true;
666 found[j] = true;
667 if (track_ramblocks) {
668 /*
669 * Reset postcopy client bases, region_rb, and
670 * region_rb_offset in case regions are removed.
671 */
672 if (fd > 0) {
673 u->region_rb_offset[j] = offset;
674 u->region_rb[j] = mr->ram_block;
675 shadow_pcb[j] = u->postcopy_client_bases[i];
676 } else {
677 u->region_rb_offset[j] = 0;
678 u->region_rb[j] = NULL;
679 }
680 }
681 break;
682 }
683 }
684
685 /*
686 * If the region was not found in the current device memory state
687 * create an entry for it in the removed list.
688 */
689 if (!matching) {
690 rem_reg[rm_idx].region = shadow_reg;
691 rem_reg[rm_idx++].reg_idx = i;
692 }
693 }
694
695 /*
696 * For regions not marked "found", create entries in the added list.
697 *
698 * Note their indexes in the device memory state and the indexes of their
699 * file descriptors.
700 */
701 for (i = 0; i < dev->mem->nregions; i++) {
702 reg = &dev->mem->regions[i];
703 vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
704 if (fd > 0) {
705 ++fd_num;
706 }
707
708 /*
709 * If the region was in both the shadow and device state we don't
710 * need to send a VHOST_USER_ADD_MEM_REG message for it.
711 */
712 if (found[i]) {
713 continue;
714 }
715
716 add_reg[add_idx].region = reg;
717 add_reg[add_idx].reg_idx = i;
718 add_reg[add_idx++].fd_idx = fd_num;
719 }
720 *nr_rem_reg = rm_idx;
721 *nr_add_reg = add_idx;
722
723 return;
724 }
725
726 static int send_remove_regions(struct vhost_dev *dev,
727 struct scrub_regions *remove_reg,
728 int nr_rem_reg, VhostUserMsg *msg,
729 bool reply_supported)
730 {
731 struct vhost_user *u = dev->opaque;
732 struct vhost_memory_region *shadow_reg;
733 int i, fd, shadow_reg_idx, ret;
734 ram_addr_t offset;
735 VhostUserMemoryRegion region_buffer;
736
737 /*
738 * The regions in remove_reg appear in the same order they do in the
739 * shadow table. Therefore we can minimize memory copies by iterating
740 * through remove_reg backwards.
741 */
742 for (i = nr_rem_reg - 1; i >= 0; i--) {
743 shadow_reg = remove_reg[i].region;
744 shadow_reg_idx = remove_reg[i].reg_idx;
745
746 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
747
748 if (fd > 0) {
749 msg->hdr.request = VHOST_USER_REM_MEM_REG;
750 vhost_user_fill_msg_region(&region_buffer, shadow_reg, 0);
751 msg->payload.mem_reg.region = region_buffer;
752
753 ret = vhost_user_write(dev, msg, &fd, 1);
754 if (ret < 0) {
755 return ret;
756 }
757
758 if (reply_supported) {
759 ret = process_message_reply(dev, msg);
760 if (ret) {
761 return ret;
762 }
763 }
764 }
765
766 /*
767 * At this point we know the backend has unmapped the region. It is now
768 * safe to remove it from the shadow table.
769 */
770 memmove(&u->shadow_regions[shadow_reg_idx],
771 &u->shadow_regions[shadow_reg_idx + 1],
772 sizeof(struct vhost_memory_region) *
773 (u->num_shadow_regions - shadow_reg_idx - 1));
774 u->num_shadow_regions--;
775 }
776
777 return 0;
778 }
779
780 static int send_add_regions(struct vhost_dev *dev,
781 struct scrub_regions *add_reg, int nr_add_reg,
782 VhostUserMsg *msg, uint64_t *shadow_pcb,
783 bool reply_supported, bool track_ramblocks)
784 {
785 struct vhost_user *u = dev->opaque;
786 int i, fd, ret, reg_idx, reg_fd_idx;
787 struct vhost_memory_region *reg;
788 MemoryRegion *mr;
789 ram_addr_t offset;
790 VhostUserMsg msg_reply;
791 VhostUserMemoryRegion region_buffer;
792
793 for (i = 0; i < nr_add_reg; i++) {
794 reg = add_reg[i].region;
795 reg_idx = add_reg[i].reg_idx;
796 reg_fd_idx = add_reg[i].fd_idx;
797
798 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
799
800 if (fd > 0) {
801 if (track_ramblocks) {
802 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
803 reg->memory_size,
804 reg->guest_phys_addr,
805 reg->userspace_addr,
806 offset);
807 u->region_rb_offset[reg_idx] = offset;
808 u->region_rb[reg_idx] = mr->ram_block;
809 }
810 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
811 vhost_user_fill_msg_region(&region_buffer, reg, offset);
812 msg->payload.mem_reg.region = region_buffer;
813
814 ret = vhost_user_write(dev, msg, &fd, 1);
815 if (ret < 0) {
816 return ret;
817 }
818
819 if (track_ramblocks) {
820 uint64_t reply_gpa;
821
822 ret = vhost_user_read(dev, &msg_reply);
823 if (ret < 0) {
824 return ret;
825 }
826
827 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
828
829 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
830 error_report("%s: Received unexpected msg type."
831 "Expected %d received %d", __func__,
832 VHOST_USER_ADD_MEM_REG,
833 msg_reply.hdr.request);
834 return -EPROTO;
835 }
836
837 /*
838 * We're using the same structure, just reusing one of the
839 * fields, so it should be the same size.
840 */
841 if (msg_reply.hdr.size != msg->hdr.size) {
842 error_report("%s: Unexpected size for postcopy reply "
843 "%d vs %d", __func__, msg_reply.hdr.size,
844 msg->hdr.size);
845 return -EPROTO;
846 }
847
848 /* Get the postcopy client base from the backend's reply. */
849 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
850 shadow_pcb[reg_idx] =
851 msg_reply.payload.mem_reg.region.userspace_addr;
852 trace_vhost_user_set_mem_table_postcopy(
853 msg_reply.payload.mem_reg.region.userspace_addr,
854 msg->payload.mem_reg.region.userspace_addr,
855 reg_fd_idx, reg_idx);
856 } else {
857 error_report("%s: invalid postcopy reply for region. "
858 "Got guest physical address %" PRIX64 ", expected "
859 "%" PRIX64, __func__, reply_gpa,
860 dev->mem->regions[reg_idx].guest_phys_addr);
861 return -EPROTO;
862 }
863 } else if (reply_supported) {
864 ret = process_message_reply(dev, msg);
865 if (ret) {
866 return ret;
867 }
868 }
869 } else if (track_ramblocks) {
870 u->region_rb_offset[reg_idx] = 0;
871 u->region_rb[reg_idx] = NULL;
872 }
873
874 /*
875 * At this point, we know the backend has mapped in the new
876 * region, if the region has a valid file descriptor.
877 *
878 * The region should now be added to the shadow table.
879 */
880 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
881 reg->guest_phys_addr;
882 u->shadow_regions[u->num_shadow_regions].userspace_addr =
883 reg->userspace_addr;
884 u->shadow_regions[u->num_shadow_regions].memory_size =
885 reg->memory_size;
886 u->num_shadow_regions++;
887 }
888
889 return 0;
890 }
891
892 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
893 VhostUserMsg *msg,
894 bool reply_supported,
895 bool track_ramblocks)
896 {
897 struct vhost_user *u = dev->opaque;
898 struct scrub_regions add_reg[VHOST_USER_MAX_RAM_SLOTS];
899 struct scrub_regions rem_reg[VHOST_USER_MAX_RAM_SLOTS];
900 uint64_t shadow_pcb[VHOST_USER_MAX_RAM_SLOTS] = {};
901 int nr_add_reg, nr_rem_reg;
902 int ret;
903
904 msg->hdr.size = sizeof(msg->payload.mem_reg);
905
906 /* Find the regions which need to be removed or added. */
907 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
908 shadow_pcb, track_ramblocks);
909
910 if (nr_rem_reg) {
911 ret = send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
912 reply_supported);
913 if (ret < 0) {
914 goto err;
915 }
916 }
917
918 if (nr_add_reg) {
919 ret = send_add_regions(dev, add_reg, nr_add_reg, msg, shadow_pcb,
920 reply_supported, track_ramblocks);
921 if (ret < 0) {
922 goto err;
923 }
924 }
925
926 if (track_ramblocks) {
927 memcpy(u->postcopy_client_bases, shadow_pcb,
928 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
929 /*
930 * Now we've registered this with the postcopy code, we ack to the
931 * client, because now we're in the position to be able to deal with
932 * any faults it generates.
933 */
934 /* TODO: Use this for failure cases as well with a bad value. */
935 msg->hdr.size = sizeof(msg->payload.u64);
936 msg->payload.u64 = 0; /* OK */
937
938 ret = vhost_user_write(dev, msg, NULL, 0);
939 if (ret < 0) {
940 return ret;
941 }
942 }
943
944 return 0;
945
946 err:
947 if (track_ramblocks) {
948 memcpy(u->postcopy_client_bases, shadow_pcb,
949 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
950 }
951
952 return ret;
953 }
954
955 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
956 struct vhost_memory *mem,
957 bool reply_supported,
958 bool config_mem_slots)
959 {
960 struct vhost_user *u = dev->opaque;
961 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
962 size_t fd_num = 0;
963 VhostUserMsg msg_reply;
964 int region_i, msg_i;
965 int ret;
966
967 VhostUserMsg msg = {
968 .hdr.flags = VHOST_USER_VERSION,
969 };
970
971 if (u->region_rb_len < dev->mem->nregions) {
972 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
973 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
974 dev->mem->nregions);
975 memset(&(u->region_rb[u->region_rb_len]), '\0',
976 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
977 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
978 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
979 u->region_rb_len = dev->mem->nregions;
980 }
981
982 if (config_mem_slots) {
983 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, true);
984 if (ret < 0) {
985 return ret;
986 }
987 } else {
988 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
989 true);
990 if (ret < 0) {
991 return ret;
992 }
993
994 ret = vhost_user_write(dev, &msg, fds, fd_num);
995 if (ret < 0) {
996 return ret;
997 }
998
999 ret = vhost_user_read(dev, &msg_reply);
1000 if (ret < 0) {
1001 return ret;
1002 }
1003
1004 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
1005 error_report("%s: Received unexpected msg type."
1006 "Expected %d received %d", __func__,
1007 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
1008 return -EPROTO;
1009 }
1010
1011 /*
1012 * We're using the same structure, just reusing one of the
1013 * fields, so it should be the same size.
1014 */
1015 if (msg_reply.hdr.size != msg.hdr.size) {
1016 error_report("%s: Unexpected size for postcopy reply "
1017 "%d vs %d", __func__, msg_reply.hdr.size,
1018 msg.hdr.size);
1019 return -EPROTO;
1020 }
1021
1022 memset(u->postcopy_client_bases, 0,
1023 sizeof(uint64_t) * VHOST_USER_MAX_RAM_SLOTS);
1024
1025 /*
1026 * They're in the same order as the regions that were sent
1027 * but some of the regions were skipped (above) if they
1028 * didn't have fd's
1029 */
1030 for (msg_i = 0, region_i = 0;
1031 region_i < dev->mem->nregions;
1032 region_i++) {
1033 if (msg_i < fd_num &&
1034 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
1035 dev->mem->regions[region_i].guest_phys_addr) {
1036 u->postcopy_client_bases[region_i] =
1037 msg_reply.payload.memory.regions[msg_i].userspace_addr;
1038 trace_vhost_user_set_mem_table_postcopy(
1039 msg_reply.payload.memory.regions[msg_i].userspace_addr,
1040 msg.payload.memory.regions[msg_i].userspace_addr,
1041 msg_i, region_i);
1042 msg_i++;
1043 }
1044 }
1045 if (msg_i != fd_num) {
1046 error_report("%s: postcopy reply not fully consumed "
1047 "%d vs %zd",
1048 __func__, msg_i, fd_num);
1049 return -EIO;
1050 }
1051
1052 /*
1053 * Now we've registered this with the postcopy code, we ack to the
1054 * client, because now we're in the position to be able to deal
1055 * with any faults it generates.
1056 */
1057 /* TODO: Use this for failure cases as well with a bad value. */
1058 msg.hdr.size = sizeof(msg.payload.u64);
1059 msg.payload.u64 = 0; /* OK */
1060 ret = vhost_user_write(dev, &msg, NULL, 0);
1061 if (ret < 0) {
1062 return ret;
1063 }
1064 }
1065
1066 return 0;
1067 }
1068
1069 static int vhost_user_set_mem_table(struct vhost_dev *dev,
1070 struct vhost_memory *mem)
1071 {
1072 struct vhost_user *u = dev->opaque;
1073 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
1074 size_t fd_num = 0;
1075 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
1076 bool reply_supported = virtio_has_feature(dev->protocol_features,
1077 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1078 bool config_mem_slots =
1079 virtio_has_feature(dev->protocol_features,
1080 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
1081 int ret;
1082
1083 if (do_postcopy) {
1084 /*
1085 * Postcopy has enough differences that it's best done in it's own
1086 * version
1087 */
1088 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
1089 config_mem_slots);
1090 }
1091
1092 VhostUserMsg msg = {
1093 .hdr.flags = VHOST_USER_VERSION,
1094 };
1095
1096 if (reply_supported) {
1097 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1098 }
1099
1100 if (config_mem_slots) {
1101 ret = vhost_user_add_remove_regions(dev, &msg, reply_supported, false);
1102 if (ret < 0) {
1103 return ret;
1104 }
1105 } else {
1106 ret = vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
1107 false);
1108 if (ret < 0) {
1109 return ret;
1110 }
1111
1112 ret = vhost_user_write(dev, &msg, fds, fd_num);
1113 if (ret < 0) {
1114 return ret;
1115 }
1116
1117 if (reply_supported) {
1118 return process_message_reply(dev, &msg);
1119 }
1120 }
1121
1122 return 0;
1123 }
1124
1125 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1126 struct vhost_vring_state *ring)
1127 {
1128 bool cross_endian = virtio_has_feature(dev->protocol_features,
1129 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1130 VhostUserMsg msg = {
1131 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1132 .hdr.flags = VHOST_USER_VERSION,
1133 .payload.state = *ring,
1134 .hdr.size = sizeof(msg.payload.state),
1135 };
1136
1137 if (!cross_endian) {
1138 error_report("vhost-user trying to send unhandled ioctl");
1139 return -ENOTSUP;
1140 }
1141
1142 return vhost_user_write(dev, &msg, NULL, 0);
1143 }
1144
1145 static int vhost_set_vring(struct vhost_dev *dev,
1146 unsigned long int request,
1147 struct vhost_vring_state *ring)
1148 {
1149 VhostUserMsg msg = {
1150 .hdr.request = request,
1151 .hdr.flags = VHOST_USER_VERSION,
1152 .payload.state = *ring,
1153 .hdr.size = sizeof(msg.payload.state),
1154 };
1155
1156 return vhost_user_write(dev, &msg, NULL, 0);
1157 }
1158
1159 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1160 struct vhost_vring_state *ring)
1161 {
1162 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1163 }
1164
1165 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
1166 int queue_idx)
1167 {
1168 struct vhost_user *u = dev->opaque;
1169 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1170 VirtIODevice *vdev = dev->vdev;
1171
1172 if (n->addr && !n->set) {
1173 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
1174 n->set = true;
1175 }
1176 }
1177
1178 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
1179 int queue_idx)
1180 {
1181 struct vhost_user *u = dev->opaque;
1182 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1183 VirtIODevice *vdev = dev->vdev;
1184
1185 if (n->addr && n->set) {
1186 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1187 n->set = false;
1188 }
1189 }
1190
1191 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1192 struct vhost_vring_state *ring)
1193 {
1194 vhost_user_host_notifier_restore(dev, ring->index);
1195
1196 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1197 }
1198
1199 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1200 {
1201 int i;
1202
1203 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1204 return -EINVAL;
1205 }
1206
1207 for (i = 0; i < dev->nvqs; ++i) {
1208 int ret;
1209 struct vhost_vring_state state = {
1210 .index = dev->vq_index + i,
1211 .num = enable,
1212 };
1213
1214 ret = vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1215 if (ret < 0) {
1216 /*
1217 * Restoring the previous state is likely infeasible, as well as
1218 * proceeding regardless the error, so just bail out and hope for
1219 * the device-level recovery.
1220 */
1221 return ret;
1222 }
1223 }
1224
1225 return 0;
1226 }
1227
1228 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1229 struct vhost_vring_state *ring)
1230 {
1231 int ret;
1232 VhostUserMsg msg = {
1233 .hdr.request = VHOST_USER_GET_VRING_BASE,
1234 .hdr.flags = VHOST_USER_VERSION,
1235 .payload.state = *ring,
1236 .hdr.size = sizeof(msg.payload.state),
1237 };
1238
1239 vhost_user_host_notifier_remove(dev, ring->index);
1240
1241 ret = vhost_user_write(dev, &msg, NULL, 0);
1242 if (ret < 0) {
1243 return ret;
1244 }
1245
1246 ret = vhost_user_read(dev, &msg);
1247 if (ret < 0) {
1248 return ret;
1249 }
1250
1251 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1252 error_report("Received unexpected msg type. Expected %d received %d",
1253 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1254 return -EPROTO;
1255 }
1256
1257 if (msg.hdr.size != sizeof(msg.payload.state)) {
1258 error_report("Received bad msg size.");
1259 return -EPROTO;
1260 }
1261
1262 *ring = msg.payload.state;
1263
1264 return 0;
1265 }
1266
1267 static int vhost_set_vring_file(struct vhost_dev *dev,
1268 VhostUserRequest request,
1269 struct vhost_vring_file *file)
1270 {
1271 int fds[VHOST_USER_MAX_RAM_SLOTS];
1272 size_t fd_num = 0;
1273 VhostUserMsg msg = {
1274 .hdr.request = request,
1275 .hdr.flags = VHOST_USER_VERSION,
1276 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1277 .hdr.size = sizeof(msg.payload.u64),
1278 };
1279
1280 if (ioeventfd_enabled() && file->fd > 0) {
1281 fds[fd_num++] = file->fd;
1282 } else {
1283 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1284 }
1285
1286 return vhost_user_write(dev, &msg, fds, fd_num);
1287 }
1288
1289 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1290 struct vhost_vring_file *file)
1291 {
1292 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1293 }
1294
1295 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1296 struct vhost_vring_file *file)
1297 {
1298 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1299 }
1300
1301
1302 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1303 {
1304 int ret;
1305 VhostUserMsg msg = {
1306 .hdr.request = request,
1307 .hdr.flags = VHOST_USER_VERSION,
1308 };
1309
1310 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1311 return 0;
1312 }
1313
1314 ret = vhost_user_write(dev, &msg, NULL, 0);
1315 if (ret < 0) {
1316 return ret;
1317 }
1318
1319 ret = vhost_user_read(dev, &msg);
1320 if (ret < 0) {
1321 return ret;
1322 }
1323
1324 if (msg.hdr.request != request) {
1325 error_report("Received unexpected msg type. Expected %d received %d",
1326 request, msg.hdr.request);
1327 return -EPROTO;
1328 }
1329
1330 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1331 error_report("Received bad msg size.");
1332 return -EPROTO;
1333 }
1334
1335 *u64 = msg.payload.u64;
1336
1337 return 0;
1338 }
1339
1340 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1341 {
1342 if (vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features) < 0) {
1343 return -EPROTO;
1344 }
1345
1346 return 0;
1347 }
1348
1349 static int enforce_reply(struct vhost_dev *dev,
1350 const VhostUserMsg *msg)
1351 {
1352 uint64_t dummy;
1353
1354 if (msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1355 return process_message_reply(dev, msg);
1356 }
1357
1358 /*
1359 * We need to wait for a reply but the backend does not
1360 * support replies for the command we just sent.
1361 * Send VHOST_USER_GET_FEATURES which makes all backends
1362 * send a reply.
1363 */
1364 return vhost_user_get_features(dev, &dummy);
1365 }
1366
1367 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
1368 struct vhost_vring_addr *addr)
1369 {
1370 int ret;
1371 VhostUserMsg msg = {
1372 .hdr.request = VHOST_USER_SET_VRING_ADDR,
1373 .hdr.flags = VHOST_USER_VERSION,
1374 .payload.addr = *addr,
1375 .hdr.size = sizeof(msg.payload.addr),
1376 };
1377
1378 bool reply_supported = virtio_has_feature(dev->protocol_features,
1379 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1380
1381 /*
1382 * wait for a reply if logging is enabled to make sure
1383 * backend is actually logging changes
1384 */
1385 bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG);
1386
1387 if (reply_supported && wait_for_reply) {
1388 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1389 }
1390
1391 ret = vhost_user_write(dev, &msg, NULL, 0);
1392 if (ret < 0) {
1393 return ret;
1394 }
1395
1396 if (wait_for_reply) {
1397 return enforce_reply(dev, &msg);
1398 }
1399
1400 return 0;
1401 }
1402
1403 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64,
1404 bool wait_for_reply)
1405 {
1406 VhostUserMsg msg = {
1407 .hdr.request = request,
1408 .hdr.flags = VHOST_USER_VERSION,
1409 .payload.u64 = u64,
1410 .hdr.size = sizeof(msg.payload.u64),
1411 };
1412 int ret;
1413
1414 if (wait_for_reply) {
1415 bool reply_supported = virtio_has_feature(dev->protocol_features,
1416 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1417 if (reply_supported) {
1418 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1419 }
1420 }
1421
1422 ret = vhost_user_write(dev, &msg, NULL, 0);
1423 if (ret < 0) {
1424 return ret;
1425 }
1426
1427 if (wait_for_reply) {
1428 return enforce_reply(dev, &msg);
1429 }
1430
1431 return 0;
1432 }
1433
1434 static int vhost_user_set_features(struct vhost_dev *dev,
1435 uint64_t features)
1436 {
1437 /*
1438 * wait for a reply if logging is enabled to make sure
1439 * backend is actually logging changes
1440 */
1441 bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL);
1442
1443 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features,
1444 log_enabled);
1445 }
1446
1447 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1448 uint64_t features)
1449 {
1450 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features,
1451 false);
1452 }
1453
1454 static int vhost_user_set_owner(struct vhost_dev *dev)
1455 {
1456 VhostUserMsg msg = {
1457 .hdr.request = VHOST_USER_SET_OWNER,
1458 .hdr.flags = VHOST_USER_VERSION,
1459 };
1460
1461 return vhost_user_write(dev, &msg, NULL, 0);
1462 }
1463
1464 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1465 uint64_t *max_memslots)
1466 {
1467 uint64_t backend_max_memslots;
1468 int err;
1469
1470 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1471 &backend_max_memslots);
1472 if (err < 0) {
1473 return err;
1474 }
1475
1476 *max_memslots = backend_max_memslots;
1477
1478 return 0;
1479 }
1480
1481 static int vhost_user_reset_device(struct vhost_dev *dev)
1482 {
1483 VhostUserMsg msg = {
1484 .hdr.flags = VHOST_USER_VERSION,
1485 };
1486
1487 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1488 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1489 ? VHOST_USER_RESET_DEVICE
1490 : VHOST_USER_RESET_OWNER;
1491
1492 return vhost_user_write(dev, &msg, NULL, 0);
1493 }
1494
1495 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1496 {
1497 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1498 return -ENOSYS;
1499 }
1500
1501 return dev->config_ops->vhost_dev_config_notifier(dev);
1502 }
1503
1504 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1505 VhostUserVringArea *area,
1506 int fd)
1507 {
1508 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1509 size_t page_size = qemu_real_host_page_size;
1510 struct vhost_user *u = dev->opaque;
1511 VhostUserState *user = u->user;
1512 VirtIODevice *vdev = dev->vdev;
1513 VhostUserHostNotifier *n;
1514 void *addr;
1515 char *name;
1516
1517 if (!virtio_has_feature(dev->protocol_features,
1518 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1519 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1520 return -EINVAL;
1521 }
1522
1523 n = &user->notifier[queue_idx];
1524
1525 if (n->addr) {
1526 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1527 object_unparent(OBJECT(&n->mr));
1528 munmap(n->addr, page_size);
1529 n->addr = NULL;
1530 }
1531
1532 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1533 return 0;
1534 }
1535
1536 /* Sanity check. */
1537 if (area->size != page_size) {
1538 return -EINVAL;
1539 }
1540
1541 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1542 fd, area->offset);
1543 if (addr == MAP_FAILED) {
1544 return -EFAULT;
1545 }
1546
1547 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1548 user, queue_idx);
1549 if (!n->mr.ram) /* Don't init again after suspend. */
1550 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1551 page_size, addr);
1552 g_free(name);
1553
1554 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1555 object_unparent(OBJECT(&n->mr));
1556 munmap(addr, page_size);
1557 return -ENXIO;
1558 }
1559
1560 n->addr = addr;
1561 n->set = true;
1562
1563 return 0;
1564 }
1565
1566 static void close_slave_channel(struct vhost_user *u)
1567 {
1568 g_source_destroy(u->slave_src);
1569 g_source_unref(u->slave_src);
1570 u->slave_src = NULL;
1571 object_unref(OBJECT(u->slave_ioc));
1572 u->slave_ioc = NULL;
1573 }
1574
1575 static gboolean slave_read(QIOChannel *ioc, GIOCondition condition,
1576 gpointer opaque)
1577 {
1578 struct vhost_dev *dev = opaque;
1579 struct vhost_user *u = dev->opaque;
1580 VhostUserHeader hdr = { 0, };
1581 VhostUserPayload payload = { 0, };
1582 Error *local_err = NULL;
1583 gboolean rc = G_SOURCE_CONTINUE;
1584 int ret = 0;
1585 struct iovec iov;
1586 g_autofree int *fd = NULL;
1587 size_t fdsize = 0;
1588 int i;
1589
1590 /* Read header */
1591 iov.iov_base = &hdr;
1592 iov.iov_len = VHOST_USER_HDR_SIZE;
1593
1594 if (qio_channel_readv_full_all(ioc, &iov, 1, &fd, &fdsize, &local_err)) {
1595 error_report_err(local_err);
1596 goto err;
1597 }
1598
1599 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1600 error_report("Failed to read msg header."
1601 " Size %d exceeds the maximum %zu.", hdr.size,
1602 VHOST_USER_PAYLOAD_SIZE);
1603 goto err;
1604 }
1605
1606 /* Read payload */
1607 if (qio_channel_read_all(ioc, (char *) &payload, hdr.size, &local_err)) {
1608 error_report_err(local_err);
1609 goto err;
1610 }
1611
1612 switch (hdr.request) {
1613 case VHOST_USER_SLAVE_IOTLB_MSG:
1614 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1615 break;
1616 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1617 ret = vhost_user_slave_handle_config_change(dev);
1618 break;
1619 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1620 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1621 fd ? fd[0] : -1);
1622 break;
1623 default:
1624 error_report("Received unexpected msg type: %d.", hdr.request);
1625 ret = -EINVAL;
1626 }
1627
1628 /*
1629 * REPLY_ACK feature handling. Other reply types has to be managed
1630 * directly in their request handlers.
1631 */
1632 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1633 struct iovec iovec[2];
1634
1635
1636 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1637 hdr.flags |= VHOST_USER_REPLY_MASK;
1638
1639 payload.u64 = !!ret;
1640 hdr.size = sizeof(payload.u64);
1641
1642 iovec[0].iov_base = &hdr;
1643 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1644 iovec[1].iov_base = &payload;
1645 iovec[1].iov_len = hdr.size;
1646
1647 if (qio_channel_writev_all(ioc, iovec, ARRAY_SIZE(iovec), &local_err)) {
1648 error_report_err(local_err);
1649 goto err;
1650 }
1651 }
1652
1653 goto fdcleanup;
1654
1655 err:
1656 close_slave_channel(u);
1657 rc = G_SOURCE_REMOVE;
1658
1659 fdcleanup:
1660 if (fd) {
1661 for (i = 0; i < fdsize; i++) {
1662 close(fd[i]);
1663 }
1664 }
1665 return rc;
1666 }
1667
1668 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1669 {
1670 VhostUserMsg msg = {
1671 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1672 .hdr.flags = VHOST_USER_VERSION,
1673 };
1674 struct vhost_user *u = dev->opaque;
1675 int sv[2], ret = 0;
1676 bool reply_supported = virtio_has_feature(dev->protocol_features,
1677 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1678 Error *local_err = NULL;
1679 QIOChannel *ioc;
1680
1681 if (!virtio_has_feature(dev->protocol_features,
1682 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1683 return 0;
1684 }
1685
1686 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1687 int saved_errno = errno;
1688 error_report("socketpair() failed");
1689 return -saved_errno;
1690 }
1691
1692 ioc = QIO_CHANNEL(qio_channel_socket_new_fd(sv[0], &local_err));
1693 if (!ioc) {
1694 error_report_err(local_err);
1695 return -ECONNREFUSED;
1696 }
1697 u->slave_ioc = ioc;
1698 slave_update_read_handler(dev, NULL);
1699
1700 if (reply_supported) {
1701 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1702 }
1703
1704 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1705 if (ret) {
1706 goto out;
1707 }
1708
1709 if (reply_supported) {
1710 ret = process_message_reply(dev, &msg);
1711 }
1712
1713 out:
1714 close(sv[1]);
1715 if (ret) {
1716 close_slave_channel(u);
1717 }
1718
1719 return ret;
1720 }
1721
1722 #ifdef CONFIG_LINUX
1723 /*
1724 * Called back from the postcopy fault thread when a fault is received on our
1725 * ufd.
1726 * TODO: This is Linux specific
1727 */
1728 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1729 void *ufd)
1730 {
1731 struct vhost_dev *dev = pcfd->data;
1732 struct vhost_user *u = dev->opaque;
1733 struct uffd_msg *msg = ufd;
1734 uint64_t faultaddr = msg->arg.pagefault.address;
1735 RAMBlock *rb = NULL;
1736 uint64_t rb_offset;
1737 int i;
1738
1739 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1740 dev->mem->nregions);
1741 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1742 trace_vhost_user_postcopy_fault_handler_loop(i,
1743 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1744 if (faultaddr >= u->postcopy_client_bases[i]) {
1745 /* Ofset of the fault address in the vhost region */
1746 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1747 if (region_offset < dev->mem->regions[i].memory_size) {
1748 rb_offset = region_offset + u->region_rb_offset[i];
1749 trace_vhost_user_postcopy_fault_handler_found(i,
1750 region_offset, rb_offset);
1751 rb = u->region_rb[i];
1752 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1753 rb_offset);
1754 }
1755 }
1756 }
1757 error_report("%s: Failed to find region for fault %" PRIx64,
1758 __func__, faultaddr);
1759 return -1;
1760 }
1761
1762 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1763 uint64_t offset)
1764 {
1765 struct vhost_dev *dev = pcfd->data;
1766 struct vhost_user *u = dev->opaque;
1767 int i;
1768
1769 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1770
1771 if (!u) {
1772 return 0;
1773 }
1774 /* Translate the offset into an address in the clients address space */
1775 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1776 if (u->region_rb[i] == rb &&
1777 offset >= u->region_rb_offset[i] &&
1778 offset < (u->region_rb_offset[i] +
1779 dev->mem->regions[i].memory_size)) {
1780 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1781 u->postcopy_client_bases[i];
1782 trace_vhost_user_postcopy_waker_found(client_addr);
1783 return postcopy_wake_shared(pcfd, client_addr, rb);
1784 }
1785 }
1786
1787 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1788 return 0;
1789 }
1790 #endif
1791
1792 /*
1793 * Called at the start of an inbound postcopy on reception of the
1794 * 'advise' command.
1795 */
1796 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1797 {
1798 #ifdef CONFIG_LINUX
1799 struct vhost_user *u = dev->opaque;
1800 CharBackend *chr = u->user->chr;
1801 int ufd;
1802 int ret;
1803 VhostUserMsg msg = {
1804 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1805 .hdr.flags = VHOST_USER_VERSION,
1806 };
1807
1808 ret = vhost_user_write(dev, &msg, NULL, 0);
1809 if (ret < 0) {
1810 error_setg(errp, "Failed to send postcopy_advise to vhost");
1811 return ret;
1812 }
1813
1814 ret = vhost_user_read(dev, &msg);
1815 if (ret < 0) {
1816 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1817 return ret;
1818 }
1819
1820 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1821 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1822 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1823 return -EPROTO;
1824 }
1825
1826 if (msg.hdr.size) {
1827 error_setg(errp, "Received bad msg size.");
1828 return -EPROTO;
1829 }
1830 ufd = qemu_chr_fe_get_msgfd(chr);
1831 if (ufd < 0) {
1832 error_setg(errp, "%s: Failed to get ufd", __func__);
1833 return -EIO;
1834 }
1835 qemu_set_nonblock(ufd);
1836
1837 /* register ufd with userfault thread */
1838 u->postcopy_fd.fd = ufd;
1839 u->postcopy_fd.data = dev;
1840 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1841 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1842 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1843 postcopy_register_shared_ufd(&u->postcopy_fd);
1844 return 0;
1845 #else
1846 error_setg(errp, "Postcopy not supported on non-Linux systems");
1847 return -ENOSYS;
1848 #endif
1849 }
1850
1851 /*
1852 * Called at the switch to postcopy on reception of the 'listen' command.
1853 */
1854 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1855 {
1856 struct vhost_user *u = dev->opaque;
1857 int ret;
1858 VhostUserMsg msg = {
1859 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1860 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1861 };
1862 u->postcopy_listen = true;
1863
1864 trace_vhost_user_postcopy_listen();
1865
1866 ret = vhost_user_write(dev, &msg, NULL, 0);
1867 if (ret < 0) {
1868 error_setg(errp, "Failed to send postcopy_listen to vhost");
1869 return ret;
1870 }
1871
1872 ret = process_message_reply(dev, &msg);
1873 if (ret) {
1874 error_setg(errp, "Failed to receive reply to postcopy_listen");
1875 return ret;
1876 }
1877
1878 return 0;
1879 }
1880
1881 /*
1882 * Called at the end of postcopy
1883 */
1884 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1885 {
1886 VhostUserMsg msg = {
1887 .hdr.request = VHOST_USER_POSTCOPY_END,
1888 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1889 };
1890 int ret;
1891 struct vhost_user *u = dev->opaque;
1892
1893 trace_vhost_user_postcopy_end_entry();
1894
1895 ret = vhost_user_write(dev, &msg, NULL, 0);
1896 if (ret < 0) {
1897 error_setg(errp, "Failed to send postcopy_end to vhost");
1898 return ret;
1899 }
1900
1901 ret = process_message_reply(dev, &msg);
1902 if (ret) {
1903 error_setg(errp, "Failed to receive reply to postcopy_end");
1904 return ret;
1905 }
1906 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1907 close(u->postcopy_fd.fd);
1908 u->postcopy_fd.handler = NULL;
1909
1910 trace_vhost_user_postcopy_end_exit();
1911
1912 return 0;
1913 }
1914
1915 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1916 void *opaque)
1917 {
1918 struct PostcopyNotifyData *pnd = opaque;
1919 struct vhost_user *u = container_of(notifier, struct vhost_user,
1920 postcopy_notifier);
1921 struct vhost_dev *dev = u->dev;
1922
1923 switch (pnd->reason) {
1924 case POSTCOPY_NOTIFY_PROBE:
1925 if (!virtio_has_feature(dev->protocol_features,
1926 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1927 /* TODO: Get the device name into this error somehow */
1928 error_setg(pnd->errp,
1929 "vhost-user backend not capable of postcopy");
1930 return -ENOENT;
1931 }
1932 break;
1933
1934 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1935 return vhost_user_postcopy_advise(dev, pnd->errp);
1936
1937 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1938 return vhost_user_postcopy_listen(dev, pnd->errp);
1939
1940 case POSTCOPY_NOTIFY_INBOUND_END:
1941 return vhost_user_postcopy_end(dev, pnd->errp);
1942
1943 default:
1944 /* We ignore notifications we don't know */
1945 break;
1946 }
1947
1948 return 0;
1949 }
1950
1951 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
1952 Error **errp)
1953 {
1954 uint64_t features, protocol_features, ram_slots;
1955 struct vhost_user *u;
1956 int err;
1957
1958 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1959
1960 u = g_new0(struct vhost_user, 1);
1961 u->user = opaque;
1962 u->dev = dev;
1963 dev->opaque = u;
1964
1965 err = vhost_user_get_features(dev, &features);
1966 if (err < 0) {
1967 error_setg_errno(errp, -err, "vhost_backend_init failed");
1968 return err;
1969 }
1970
1971 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1972 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1973
1974 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1975 &protocol_features);
1976 if (err < 0) {
1977 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
1978 return -EPROTO;
1979 }
1980
1981 dev->protocol_features =
1982 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1983
1984 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1985 /* Don't acknowledge CONFIG feature if device doesn't support it */
1986 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1987 } else if (!(protocol_features &
1988 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1989 error_setg(errp, "Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1990 "but backend does not support it.");
1991 return -EINVAL;
1992 }
1993
1994 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1995 if (err < 0) {
1996 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
1997 return -EPROTO;
1998 }
1999
2000 /* query the max queues we support if backend supports Multiple Queue */
2001 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
2002 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
2003 &dev->max_queues);
2004 if (err < 0) {
2005 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2006 return -EPROTO;
2007 }
2008 } else {
2009 dev->max_queues = 1;
2010 }
2011
2012 if (dev->num_queues && dev->max_queues < dev->num_queues) {
2013 error_setg(errp, "The maximum number of queues supported by the "
2014 "backend is %" PRIu64, dev->max_queues);
2015 return -EINVAL;
2016 }
2017
2018 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
2019 !(virtio_has_feature(dev->protocol_features,
2020 VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
2021 virtio_has_feature(dev->protocol_features,
2022 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
2023 error_setg(errp, "IOMMU support requires reply-ack and "
2024 "slave-req protocol features.");
2025 return -EINVAL;
2026 }
2027
2028 /* get max memory regions if backend supports configurable RAM slots */
2029 if (!virtio_has_feature(dev->protocol_features,
2030 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
2031 u->user->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS;
2032 } else {
2033 err = vhost_user_get_max_memslots(dev, &ram_slots);
2034 if (err < 0) {
2035 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2036 return -EPROTO;
2037 }
2038
2039 if (ram_slots < u->user->memory_slots) {
2040 error_setg(errp, "The backend specified a max ram slots limit "
2041 "of %" PRIu64", when the prior validated limit was "
2042 "%d. This limit should never decrease.", ram_slots,
2043 u->user->memory_slots);
2044 return -EINVAL;
2045 }
2046
2047 u->user->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS);
2048 }
2049 }
2050
2051 if (dev->migration_blocker == NULL &&
2052 !virtio_has_feature(dev->protocol_features,
2053 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
2054 error_setg(&dev->migration_blocker,
2055 "Migration disabled: vhost-user backend lacks "
2056 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
2057 }
2058
2059 if (dev->vq_index == 0) {
2060 err = vhost_setup_slave_channel(dev);
2061 if (err < 0) {
2062 error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
2063 return -EPROTO;
2064 }
2065 }
2066
2067 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
2068 postcopy_add_notifier(&u->postcopy_notifier);
2069
2070 return 0;
2071 }
2072
2073 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
2074 {
2075 struct vhost_user *u;
2076
2077 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2078
2079 u = dev->opaque;
2080 if (u->postcopy_notifier.notify) {
2081 postcopy_remove_notifier(&u->postcopy_notifier);
2082 u->postcopy_notifier.notify = NULL;
2083 }
2084 u->postcopy_listen = false;
2085 if (u->postcopy_fd.handler) {
2086 postcopy_unregister_shared_ufd(&u->postcopy_fd);
2087 close(u->postcopy_fd.fd);
2088 u->postcopy_fd.handler = NULL;
2089 }
2090 if (u->slave_ioc) {
2091 close_slave_channel(u);
2092 }
2093 g_free(u->region_rb);
2094 u->region_rb = NULL;
2095 g_free(u->region_rb_offset);
2096 u->region_rb_offset = NULL;
2097 u->region_rb_len = 0;
2098 g_free(u);
2099 dev->opaque = 0;
2100
2101 return 0;
2102 }
2103
2104 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
2105 {
2106 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
2107
2108 return idx;
2109 }
2110
2111 static int vhost_user_memslots_limit(struct vhost_dev *dev)
2112 {
2113 struct vhost_user *u = dev->opaque;
2114
2115 return u->user->memory_slots;
2116 }
2117
2118 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
2119 {
2120 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2121
2122 return virtio_has_feature(dev->protocol_features,
2123 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
2124 }
2125
2126 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
2127 {
2128 VhostUserMsg msg = { };
2129
2130 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2131
2132 /* If guest supports GUEST_ANNOUNCE do nothing */
2133 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
2134 return 0;
2135 }
2136
2137 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
2138 if (virtio_has_feature(dev->protocol_features,
2139 VHOST_USER_PROTOCOL_F_RARP)) {
2140 msg.hdr.request = VHOST_USER_SEND_RARP;
2141 msg.hdr.flags = VHOST_USER_VERSION;
2142 memcpy((char *)&msg.payload.u64, mac_addr, 6);
2143 msg.hdr.size = sizeof(msg.payload.u64);
2144
2145 return vhost_user_write(dev, &msg, NULL, 0);
2146 }
2147 return -ENOTSUP;
2148 }
2149
2150 static bool vhost_user_can_merge(struct vhost_dev *dev,
2151 uint64_t start1, uint64_t size1,
2152 uint64_t start2, uint64_t size2)
2153 {
2154 ram_addr_t offset;
2155 int mfd, rfd;
2156
2157 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
2158 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
2159
2160 return mfd == rfd;
2161 }
2162
2163 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
2164 {
2165 VhostUserMsg msg;
2166 bool reply_supported = virtio_has_feature(dev->protocol_features,
2167 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2168 int ret;
2169
2170 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
2171 return 0;
2172 }
2173
2174 msg.hdr.request = VHOST_USER_NET_SET_MTU;
2175 msg.payload.u64 = mtu;
2176 msg.hdr.size = sizeof(msg.payload.u64);
2177 msg.hdr.flags = VHOST_USER_VERSION;
2178 if (reply_supported) {
2179 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2180 }
2181
2182 ret = vhost_user_write(dev, &msg, NULL, 0);
2183 if (ret < 0) {
2184 return ret;
2185 }
2186
2187 /* If reply_ack supported, slave has to ack specified MTU is valid */
2188 if (reply_supported) {
2189 return process_message_reply(dev, &msg);
2190 }
2191
2192 return 0;
2193 }
2194
2195 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2196 struct vhost_iotlb_msg *imsg)
2197 {
2198 int ret;
2199 VhostUserMsg msg = {
2200 .hdr.request = VHOST_USER_IOTLB_MSG,
2201 .hdr.size = sizeof(msg.payload.iotlb),
2202 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2203 .payload.iotlb = *imsg,
2204 };
2205
2206 ret = vhost_user_write(dev, &msg, NULL, 0);
2207 if (ret < 0) {
2208 return ret;
2209 }
2210
2211 return process_message_reply(dev, &msg);
2212 }
2213
2214
2215 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2216 {
2217 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2218 }
2219
2220 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2221 uint32_t config_len, Error **errp)
2222 {
2223 int ret;
2224 VhostUserMsg msg = {
2225 .hdr.request = VHOST_USER_GET_CONFIG,
2226 .hdr.flags = VHOST_USER_VERSION,
2227 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2228 };
2229
2230 if (!virtio_has_feature(dev->protocol_features,
2231 VHOST_USER_PROTOCOL_F_CONFIG)) {
2232 error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
2233 return -EINVAL;
2234 }
2235
2236 assert(config_len <= VHOST_USER_MAX_CONFIG_SIZE);
2237
2238 msg.payload.config.offset = 0;
2239 msg.payload.config.size = config_len;
2240 ret = vhost_user_write(dev, &msg, NULL, 0);
2241 if (ret < 0) {
2242 error_setg_errno(errp, -ret, "vhost_get_config failed");
2243 return ret;
2244 }
2245
2246 ret = vhost_user_read(dev, &msg);
2247 if (ret < 0) {
2248 error_setg_errno(errp, -ret, "vhost_get_config failed");
2249 return ret;
2250 }
2251
2252 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2253 error_setg(errp,
2254 "Received unexpected msg type. Expected %d received %d",
2255 VHOST_USER_GET_CONFIG, msg.hdr.request);
2256 return -EPROTO;
2257 }
2258
2259 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2260 error_setg(errp, "Received bad msg size.");
2261 return -EPROTO;
2262 }
2263
2264 memcpy(config, msg.payload.config.region, config_len);
2265
2266 return 0;
2267 }
2268
2269 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2270 uint32_t offset, uint32_t size, uint32_t flags)
2271 {
2272 int ret;
2273 uint8_t *p;
2274 bool reply_supported = virtio_has_feature(dev->protocol_features,
2275 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2276
2277 VhostUserMsg msg = {
2278 .hdr.request = VHOST_USER_SET_CONFIG,
2279 .hdr.flags = VHOST_USER_VERSION,
2280 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2281 };
2282
2283 if (!virtio_has_feature(dev->protocol_features,
2284 VHOST_USER_PROTOCOL_F_CONFIG)) {
2285 return -ENOTSUP;
2286 }
2287
2288 if (reply_supported) {
2289 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2290 }
2291
2292 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2293 return -EINVAL;
2294 }
2295
2296 msg.payload.config.offset = offset,
2297 msg.payload.config.size = size,
2298 msg.payload.config.flags = flags,
2299 p = msg.payload.config.region;
2300 memcpy(p, data, size);
2301
2302 ret = vhost_user_write(dev, &msg, NULL, 0);
2303 if (ret < 0) {
2304 return ret;
2305 }
2306
2307 if (reply_supported) {
2308 return process_message_reply(dev, &msg);
2309 }
2310
2311 return 0;
2312 }
2313
2314 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2315 void *session_info,
2316 uint64_t *session_id)
2317 {
2318 int ret;
2319 bool crypto_session = virtio_has_feature(dev->protocol_features,
2320 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2321 CryptoDevBackendSymSessionInfo *sess_info = session_info;
2322 VhostUserMsg msg = {
2323 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2324 .hdr.flags = VHOST_USER_VERSION,
2325 .hdr.size = sizeof(msg.payload.session),
2326 };
2327
2328 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2329
2330 if (!crypto_session) {
2331 error_report("vhost-user trying to send unhandled ioctl");
2332 return -ENOTSUP;
2333 }
2334
2335 memcpy(&msg.payload.session.session_setup_data, sess_info,
2336 sizeof(CryptoDevBackendSymSessionInfo));
2337 if (sess_info->key_len) {
2338 memcpy(&msg.payload.session.key, sess_info->cipher_key,
2339 sess_info->key_len);
2340 }
2341 if (sess_info->auth_key_len > 0) {
2342 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2343 sess_info->auth_key_len);
2344 }
2345 ret = vhost_user_write(dev, &msg, NULL, 0);
2346 if (ret < 0) {
2347 error_report("vhost_user_write() return %d, create session failed",
2348 ret);
2349 return ret;
2350 }
2351
2352 ret = vhost_user_read(dev, &msg);
2353 if (ret < 0) {
2354 error_report("vhost_user_read() return %d, create session failed",
2355 ret);
2356 return ret;
2357 }
2358
2359 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2360 error_report("Received unexpected msg type. Expected %d received %d",
2361 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2362 return -EPROTO;
2363 }
2364
2365 if (msg.hdr.size != sizeof(msg.payload.session)) {
2366 error_report("Received bad msg size.");
2367 return -EPROTO;
2368 }
2369
2370 if (msg.payload.session.session_id < 0) {
2371 error_report("Bad session id: %" PRId64 "",
2372 msg.payload.session.session_id);
2373 return -EINVAL;
2374 }
2375 *session_id = msg.payload.session.session_id;
2376
2377 return 0;
2378 }
2379
2380 static int
2381 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2382 {
2383 int ret;
2384 bool crypto_session = virtio_has_feature(dev->protocol_features,
2385 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2386 VhostUserMsg msg = {
2387 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2388 .hdr.flags = VHOST_USER_VERSION,
2389 .hdr.size = sizeof(msg.payload.u64),
2390 };
2391 msg.payload.u64 = session_id;
2392
2393 if (!crypto_session) {
2394 error_report("vhost-user trying to send unhandled ioctl");
2395 return -ENOTSUP;
2396 }
2397
2398 ret = vhost_user_write(dev, &msg, NULL, 0);
2399 if (ret < 0) {
2400 error_report("vhost_user_write() return %d, close session failed",
2401 ret);
2402 return ret;
2403 }
2404
2405 return 0;
2406 }
2407
2408 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2409 MemoryRegionSection *section)
2410 {
2411 bool result;
2412
2413 result = memory_region_get_fd(section->mr) >= 0;
2414
2415 return result;
2416 }
2417
2418 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2419 uint16_t queue_size,
2420 struct vhost_inflight *inflight)
2421 {
2422 void *addr;
2423 int fd;
2424 int ret;
2425 struct vhost_user *u = dev->opaque;
2426 CharBackend *chr = u->user->chr;
2427 VhostUserMsg msg = {
2428 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2429 .hdr.flags = VHOST_USER_VERSION,
2430 .payload.inflight.num_queues = dev->nvqs,
2431 .payload.inflight.queue_size = queue_size,
2432 .hdr.size = sizeof(msg.payload.inflight),
2433 };
2434
2435 if (!virtio_has_feature(dev->protocol_features,
2436 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2437 return 0;
2438 }
2439
2440 ret = vhost_user_write(dev, &msg, NULL, 0);
2441 if (ret < 0) {
2442 return ret;
2443 }
2444
2445 ret = vhost_user_read(dev, &msg);
2446 if (ret < 0) {
2447 return ret;
2448 }
2449
2450 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2451 error_report("Received unexpected msg type. "
2452 "Expected %d received %d",
2453 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2454 return -EPROTO;
2455 }
2456
2457 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2458 error_report("Received bad msg size.");
2459 return -EPROTO;
2460 }
2461
2462 if (!msg.payload.inflight.mmap_size) {
2463 return 0;
2464 }
2465
2466 fd = qemu_chr_fe_get_msgfd(chr);
2467 if (fd < 0) {
2468 error_report("Failed to get mem fd");
2469 return -EIO;
2470 }
2471
2472 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2473 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2474
2475 if (addr == MAP_FAILED) {
2476 error_report("Failed to mmap mem fd");
2477 close(fd);
2478 return -EFAULT;
2479 }
2480
2481 inflight->addr = addr;
2482 inflight->fd = fd;
2483 inflight->size = msg.payload.inflight.mmap_size;
2484 inflight->offset = msg.payload.inflight.mmap_offset;
2485 inflight->queue_size = queue_size;
2486
2487 return 0;
2488 }
2489
2490 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2491 struct vhost_inflight *inflight)
2492 {
2493 VhostUserMsg msg = {
2494 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2495 .hdr.flags = VHOST_USER_VERSION,
2496 .payload.inflight.mmap_size = inflight->size,
2497 .payload.inflight.mmap_offset = inflight->offset,
2498 .payload.inflight.num_queues = dev->nvqs,
2499 .payload.inflight.queue_size = inflight->queue_size,
2500 .hdr.size = sizeof(msg.payload.inflight),
2501 };
2502
2503 if (!virtio_has_feature(dev->protocol_features,
2504 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2505 return 0;
2506 }
2507
2508 return vhost_user_write(dev, &msg, &inflight->fd, 1);
2509 }
2510
2511 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2512 {
2513 if (user->chr) {
2514 error_setg(errp, "Cannot initialize vhost-user state");
2515 return false;
2516 }
2517 user->chr = chr;
2518 user->memory_slots = 0;
2519 return true;
2520 }
2521
2522 void vhost_user_cleanup(VhostUserState *user)
2523 {
2524 int i;
2525
2526 if (!user->chr) {
2527 return;
2528 }
2529 memory_region_transaction_begin();
2530 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2531 if (user->notifier[i].addr) {
2532 object_unparent(OBJECT(&user->notifier[i].mr));
2533 munmap(user->notifier[i].addr, qemu_real_host_page_size);
2534 user->notifier[i].addr = NULL;
2535 }
2536 }
2537 memory_region_transaction_commit();
2538 user->chr = NULL;
2539 }
2540
2541 const VhostOps user_ops = {
2542 .backend_type = VHOST_BACKEND_TYPE_USER,
2543 .vhost_backend_init = vhost_user_backend_init,
2544 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2545 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2546 .vhost_set_log_base = vhost_user_set_log_base,
2547 .vhost_set_mem_table = vhost_user_set_mem_table,
2548 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2549 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2550 .vhost_set_vring_num = vhost_user_set_vring_num,
2551 .vhost_set_vring_base = vhost_user_set_vring_base,
2552 .vhost_get_vring_base = vhost_user_get_vring_base,
2553 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2554 .vhost_set_vring_call = vhost_user_set_vring_call,
2555 .vhost_set_features = vhost_user_set_features,
2556 .vhost_get_features = vhost_user_get_features,
2557 .vhost_set_owner = vhost_user_set_owner,
2558 .vhost_reset_device = vhost_user_reset_device,
2559 .vhost_get_vq_index = vhost_user_get_vq_index,
2560 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2561 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2562 .vhost_migration_done = vhost_user_migration_done,
2563 .vhost_backend_can_merge = vhost_user_can_merge,
2564 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2565 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2566 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2567 .vhost_get_config = vhost_user_get_config,
2568 .vhost_set_config = vhost_user_set_config,
2569 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2570 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2571 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2572 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2573 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2574 };