hw/ide/core: Trivial typo fix
[qemu.git] / contrib / libvhost-user / libvhost-user.h
1 /*
2 * Vhost User library
3 *
4 * Copyright (c) 2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Victor Kaplansky <victork@redhat.com>
8 * Marc-André Lureau <mlureau@redhat.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or
11 * later. See the COPYING file in the top-level directory.
12 */
13
14 #ifndef LIBVHOST_USER_H
15 #define LIBVHOST_USER_H
16
17 #include <stdint.h>
18 #include <stdbool.h>
19 #include <stddef.h>
20 #include <sys/poll.h>
21 #include <linux/vhost.h>
22 #include <pthread.h>
23 #include "standard-headers/linux/virtio_ring.h"
24
25 /* Based on qemu/hw/virtio/vhost-user.c */
26 #define VHOST_USER_F_PROTOCOL_FEATURES 30
27 #define VHOST_LOG_PAGE 4096
28
29 #define VIRTQUEUE_MAX_SIZE 1024
30
31 #define VHOST_MEMORY_BASELINE_NREGIONS 8
32
33 /*
34 * Set a reasonable maximum number of ram slots, which will be supported by
35 * any architecture.
36 */
37 #define VHOST_USER_MAX_RAM_SLOTS 32
38
39 typedef enum VhostSetConfigType {
40 VHOST_SET_CONFIG_TYPE_MASTER = 0,
41 VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
42 } VhostSetConfigType;
43
44 /*
45 * Maximum size of virtio device config space
46 */
47 #define VHOST_USER_MAX_CONFIG_SIZE 256
48
49 enum VhostUserProtocolFeature {
50 VHOST_USER_PROTOCOL_F_MQ = 0,
51 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
52 VHOST_USER_PROTOCOL_F_RARP = 2,
53 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
54 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
55 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
56 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
57 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
58 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
59 VHOST_USER_PROTOCOL_F_CONFIG = 9,
60 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
61 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
62 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
63 VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14,
64 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
65
66 VHOST_USER_PROTOCOL_F_MAX
67 };
68
69 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
70
71 typedef enum VhostUserRequest {
72 VHOST_USER_NONE = 0,
73 VHOST_USER_GET_FEATURES = 1,
74 VHOST_USER_SET_FEATURES = 2,
75 VHOST_USER_SET_OWNER = 3,
76 VHOST_USER_RESET_OWNER = 4,
77 VHOST_USER_SET_MEM_TABLE = 5,
78 VHOST_USER_SET_LOG_BASE = 6,
79 VHOST_USER_SET_LOG_FD = 7,
80 VHOST_USER_SET_VRING_NUM = 8,
81 VHOST_USER_SET_VRING_ADDR = 9,
82 VHOST_USER_SET_VRING_BASE = 10,
83 VHOST_USER_GET_VRING_BASE = 11,
84 VHOST_USER_SET_VRING_KICK = 12,
85 VHOST_USER_SET_VRING_CALL = 13,
86 VHOST_USER_SET_VRING_ERR = 14,
87 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
88 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
89 VHOST_USER_GET_QUEUE_NUM = 17,
90 VHOST_USER_SET_VRING_ENABLE = 18,
91 VHOST_USER_SEND_RARP = 19,
92 VHOST_USER_NET_SET_MTU = 20,
93 VHOST_USER_SET_SLAVE_REQ_FD = 21,
94 VHOST_USER_IOTLB_MSG = 22,
95 VHOST_USER_SET_VRING_ENDIAN = 23,
96 VHOST_USER_GET_CONFIG = 24,
97 VHOST_USER_SET_CONFIG = 25,
98 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
99 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
100 VHOST_USER_POSTCOPY_ADVISE = 28,
101 VHOST_USER_POSTCOPY_LISTEN = 29,
102 VHOST_USER_POSTCOPY_END = 30,
103 VHOST_USER_GET_INFLIGHT_FD = 31,
104 VHOST_USER_SET_INFLIGHT_FD = 32,
105 VHOST_USER_GPU_SET_SOCKET = 33,
106 VHOST_USER_VRING_KICK = 35,
107 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
108 VHOST_USER_ADD_MEM_REG = 37,
109 VHOST_USER_REM_MEM_REG = 38,
110 VHOST_USER_MAX
111 } VhostUserRequest;
112
113 typedef enum VhostUserSlaveRequest {
114 VHOST_USER_SLAVE_NONE = 0,
115 VHOST_USER_SLAVE_IOTLB_MSG = 1,
116 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
117 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
118 VHOST_USER_SLAVE_VRING_CALL = 4,
119 VHOST_USER_SLAVE_VRING_ERR = 5,
120 VHOST_USER_SLAVE_MAX
121 } VhostUserSlaveRequest;
122
123 typedef struct VhostUserMemoryRegion {
124 uint64_t guest_phys_addr;
125 uint64_t memory_size;
126 uint64_t userspace_addr;
127 uint64_t mmap_offset;
128 } VhostUserMemoryRegion;
129
130 typedef struct VhostUserMemory {
131 uint32_t nregions;
132 uint32_t padding;
133 VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS];
134 } VhostUserMemory;
135
136 typedef struct VhostUserMemRegMsg {
137 uint32_t padding;
138 VhostUserMemoryRegion region;
139 } VhostUserMemRegMsg;
140
141 typedef struct VhostUserLog {
142 uint64_t mmap_size;
143 uint64_t mmap_offset;
144 } VhostUserLog;
145
146 typedef struct VhostUserConfig {
147 uint32_t offset;
148 uint32_t size;
149 uint32_t flags;
150 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
151 } VhostUserConfig;
152
153 static VhostUserConfig c __attribute__ ((unused));
154 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
155 + sizeof(c.size) \
156 + sizeof(c.flags))
157
158 typedef struct VhostUserVringArea {
159 uint64_t u64;
160 uint64_t size;
161 uint64_t offset;
162 } VhostUserVringArea;
163
164 typedef struct VhostUserInflight {
165 uint64_t mmap_size;
166 uint64_t mmap_offset;
167 uint16_t num_queues;
168 uint16_t queue_size;
169 } VhostUserInflight;
170
171 #if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__))
172 # define VU_PACKED __attribute__((gcc_struct, packed))
173 #else
174 # define VU_PACKED __attribute__((packed))
175 #endif
176
177 typedef struct VhostUserMsg {
178 int request;
179
180 #define VHOST_USER_VERSION_MASK (0x3)
181 #define VHOST_USER_REPLY_MASK (0x1 << 2)
182 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
183 uint32_t flags;
184 uint32_t size; /* the following payload size */
185
186 union {
187 #define VHOST_USER_VRING_IDX_MASK (0xff)
188 #define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
189 uint64_t u64;
190 struct vhost_vring_state state;
191 struct vhost_vring_addr addr;
192 VhostUserMemory memory;
193 VhostUserMemRegMsg memreg;
194 VhostUserLog log;
195 VhostUserConfig config;
196 VhostUserVringArea area;
197 VhostUserInflight inflight;
198 } payload;
199
200 int fds[VHOST_MEMORY_BASELINE_NREGIONS];
201 int fd_num;
202 uint8_t *data;
203 } VU_PACKED VhostUserMsg;
204
205 typedef struct VuDevRegion {
206 /* Guest Physical address. */
207 uint64_t gpa;
208 /* Memory region size. */
209 uint64_t size;
210 /* QEMU virtual address (userspace). */
211 uint64_t qva;
212 /* Starting offset in our mmaped space. */
213 uint64_t mmap_offset;
214 /* Start address of mmaped space. */
215 uint64_t mmap_addr;
216 } VuDevRegion;
217
218 typedef struct VuDev VuDev;
219
220 typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
221 typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
222 typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
223 int *do_reply);
224 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
225 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
226 typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
227 typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
228 uint32_t offset, uint32_t size,
229 uint32_t flags);
230
231 typedef struct VuDevIface {
232 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
233 vu_get_features_cb get_features;
234 /* enable vhost implementation features */
235 vu_set_features_cb set_features;
236 /* get the protocol feature bitmask from the underlying vhost
237 * implementation */
238 vu_get_features_cb get_protocol_features;
239 /* enable protocol features in the underlying vhost implementation. */
240 vu_set_features_cb set_protocol_features;
241 /* process_msg is called for each vhost-user message received */
242 /* skip libvhost-user processing if return value != 0 */
243 vu_process_msg_cb process_msg;
244 /* tells when queues can be processed */
245 vu_queue_set_started_cb queue_set_started;
246 /*
247 * If the queue is processed in order, in which case it will be
248 * resumed to vring.used->idx. This can help to support resuming
249 * on unmanaged exit/crash.
250 */
251 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
252 /* get the config space of the device */
253 vu_get_config_cb get_config;
254 /* set the config space of the device */
255 vu_set_config_cb set_config;
256 } VuDevIface;
257
258 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
259
260 typedef struct VuRing {
261 unsigned int num;
262 struct vring_desc *desc;
263 struct vring_avail *avail;
264 struct vring_used *used;
265 uint64_t log_guest_addr;
266 uint32_t flags;
267 } VuRing;
268
269 typedef struct VuDescStateSplit {
270 /* Indicate whether this descriptor is inflight or not.
271 * Only available for head-descriptor. */
272 uint8_t inflight;
273
274 /* Padding */
275 uint8_t padding[5];
276
277 /* Maintain a list for the last batch of used descriptors.
278 * Only available when batching is used for submitting */
279 uint16_t next;
280
281 /* Used to preserve the order of fetching available descriptors.
282 * Only available for head-descriptor. */
283 uint64_t counter;
284 } VuDescStateSplit;
285
286 typedef struct VuVirtqInflight {
287 /* The feature flags of this region. Now it's initialized to 0. */
288 uint64_t features;
289
290 /* The version of this region. It's 1 currently.
291 * Zero value indicates a vm reset happened. */
292 uint16_t version;
293
294 /* The size of VuDescStateSplit array. It's equal to the virtqueue
295 * size. Slave could get it from queue size field of VhostUserInflight. */
296 uint16_t desc_num;
297
298 /* The head of list that track the last batch of used descriptors. */
299 uint16_t last_batch_head;
300
301 /* Storing the idx value of used ring */
302 uint16_t used_idx;
303
304 /* Used to track the state of each descriptor in descriptor table */
305 VuDescStateSplit desc[];
306 } VuVirtqInflight;
307
308 typedef struct VuVirtqInflightDesc {
309 uint16_t index;
310 uint64_t counter;
311 } VuVirtqInflightDesc;
312
313 typedef struct VuVirtq {
314 VuRing vring;
315
316 VuVirtqInflight *inflight;
317
318 VuVirtqInflightDesc *resubmit_list;
319
320 uint16_t resubmit_num;
321
322 uint64_t counter;
323
324 /* Next head to pop */
325 uint16_t last_avail_idx;
326
327 /* Last avail_idx read from VQ. */
328 uint16_t shadow_avail_idx;
329
330 uint16_t used_idx;
331
332 /* Last used index value we have signalled on */
333 uint16_t signalled_used;
334
335 /* Last used index value we have signalled on */
336 bool signalled_used_valid;
337
338 /* Notification enabled? */
339 bool notification;
340
341 int inuse;
342
343 vu_queue_handler_cb handler;
344
345 int call_fd;
346 int kick_fd;
347 int err_fd;
348 unsigned int enable;
349 bool started;
350
351 /* Guest addresses of our ring */
352 struct vhost_vring_addr vra;
353 } VuVirtq;
354
355 enum VuWatchCondtion {
356 VU_WATCH_IN = POLLIN,
357 VU_WATCH_OUT = POLLOUT,
358 VU_WATCH_PRI = POLLPRI,
359 VU_WATCH_ERR = POLLERR,
360 VU_WATCH_HUP = POLLHUP,
361 };
362
363 typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
364 typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
365 typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
366 vu_watch_cb cb, void *data);
367 typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
368
369 typedef struct VuDevInflightInfo {
370 int fd;
371 void *addr;
372 uint64_t size;
373 } VuDevInflightInfo;
374
375 struct VuDev {
376 int sock;
377 uint32_t nregions;
378 VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS];
379 VuVirtq *vq;
380 VuDevInflightInfo inflight_info;
381 int log_call_fd;
382 /* Must be held while using slave_fd */
383 pthread_mutex_t slave_mutex;
384 int slave_fd;
385 uint64_t log_size;
386 uint8_t *log_table;
387 uint64_t features;
388 uint64_t protocol_features;
389 bool broken;
390 uint16_t max_queues;
391
392 /* @set_watch: add or update the given fd to the watch set,
393 * call cb when condition is met */
394 vu_set_watch_cb set_watch;
395
396 /* @remove_watch: remove the given fd from the watch set */
397 vu_remove_watch_cb remove_watch;
398
399 /* @panic: encountered an unrecoverable error, you may try to
400 * re-initialize */
401 vu_panic_cb panic;
402 const VuDevIface *iface;
403
404 /* Postcopy data */
405 int postcopy_ufd;
406 bool postcopy_listening;
407 };
408
409 typedef struct VuVirtqElement {
410 unsigned int index;
411 unsigned int out_num;
412 unsigned int in_num;
413 struct iovec *in_sg;
414 struct iovec *out_sg;
415 } VuVirtqElement;
416
417 /**
418 * vu_init:
419 * @dev: a VuDev context
420 * @max_queues: maximum number of virtqueues
421 * @socket: the socket connected to vhost-user master
422 * @panic: a panic callback
423 * @set_watch: a set_watch callback
424 * @remove_watch: a remove_watch callback
425 * @iface: a VuDevIface structure with vhost-user device callbacks
426 *
427 * Intializes a VuDev vhost-user context.
428 *
429 * Returns: true on success, false on failure.
430 **/
431 bool vu_init(VuDev *dev,
432 uint16_t max_queues,
433 int socket,
434 vu_panic_cb panic,
435 vu_set_watch_cb set_watch,
436 vu_remove_watch_cb remove_watch,
437 const VuDevIface *iface);
438
439
440 /**
441 * vu_deinit:
442 * @dev: a VuDev context
443 *
444 * Cleans up the VuDev context
445 */
446 void vu_deinit(VuDev *dev);
447
448 /**
449 * vu_dispatch:
450 * @dev: a VuDev context
451 *
452 * Process one vhost-user message.
453 *
454 * Returns: TRUE on success, FALSE on failure.
455 */
456 bool vu_dispatch(VuDev *dev);
457
458 /**
459 * vu_gpa_to_va:
460 * @dev: a VuDev context
461 * @plen: guest memory size
462 * @guest_addr: guest address
463 *
464 * Translate a guest address to a pointer. Returns NULL on failure.
465 */
466 void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr);
467
468 /**
469 * vu_get_queue:
470 * @dev: a VuDev context
471 * @qidx: queue index
472 *
473 * Returns the queue number @qidx.
474 */
475 VuVirtq *vu_get_queue(VuDev *dev, int qidx);
476
477 /**
478 * vu_set_queue_handler:
479 * @dev: a VuDev context
480 * @vq: a VuVirtq queue
481 * @handler: the queue handler callback
482 *
483 * Set the queue handler. This function may be called several times
484 * for the same queue. If called with NULL @handler, the handler is
485 * removed.
486 */
487 void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
488 vu_queue_handler_cb handler);
489
490 /**
491 * vu_set_queue_host_notifier:
492 * @dev: a VuDev context
493 * @vq: a VuVirtq queue
494 * @fd: a file descriptor
495 * @size: host page size
496 * @offset: notifier offset in @fd file
497 *
498 * Set queue's host notifier. This function may be called several
499 * times for the same queue. If called with -1 @fd, the notifier
500 * is removed.
501 */
502 bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd,
503 int size, int offset);
504
505 /**
506 * vu_queue_set_notification:
507 * @dev: a VuDev context
508 * @vq: a VuVirtq queue
509 * @enable: state
510 *
511 * Set whether the queue notifies (via event index or interrupt)
512 */
513 void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
514
515 /**
516 * vu_queue_enabled:
517 * @dev: a VuDev context
518 * @vq: a VuVirtq queue
519 *
520 * Returns: whether the queue is enabled.
521 */
522 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
523
524 /**
525 * vu_queue_started:
526 * @dev: a VuDev context
527 * @vq: a VuVirtq queue
528 *
529 * Returns: whether the queue is started.
530 */
531 bool vu_queue_started(const VuDev *dev, const VuVirtq *vq);
532
533 /**
534 * vu_queue_empty:
535 * @dev: a VuDev context
536 * @vq: a VuVirtq queue
537 *
538 * Returns: true if the queue is empty or not ready.
539 */
540 bool vu_queue_empty(VuDev *dev, VuVirtq *vq);
541
542 /**
543 * vu_queue_notify:
544 * @dev: a VuDev context
545 * @vq: a VuVirtq queue
546 *
547 * Request to notify the queue via callfd (skipped if unnecessary)
548 */
549 void vu_queue_notify(VuDev *dev, VuVirtq *vq);
550
551 /**
552 * vu_queue_notify_sync:
553 * @dev: a VuDev context
554 * @vq: a VuVirtq queue
555 *
556 * Request to notify the queue via callfd (skipped if unnecessary)
557 * or sync message if possible.
558 */
559 void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq);
560
561 /**
562 * vu_queue_pop:
563 * @dev: a VuDev context
564 * @vq: a VuVirtq queue
565 * @sz: the size of struct to return (must be >= VuVirtqElement)
566 *
567 * Returns: a VuVirtqElement filled from the queue or NULL. The
568 * returned element must be free()-d by the caller.
569 */
570 void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
571
572
573 /**
574 * vu_queue_unpop:
575 * @dev: a VuDev context
576 * @vq: a VuVirtq queue
577 * @elem: The #VuVirtqElement
578 * @len: number of bytes written
579 *
580 * Pretend the most recent element wasn't popped from the virtqueue. The next
581 * call to vu_queue_pop() will refetch the element.
582 */
583 void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem,
584 size_t len);
585
586 /**
587 * vu_queue_rewind:
588 * @dev: a VuDev context
589 * @vq: a VuVirtq queue
590 * @num: number of elements to push back
591 *
592 * Pretend that elements weren't popped from the virtqueue. The next
593 * virtqueue_pop() will refetch the oldest element.
594 *
595 * Returns: true on success, false if @num is greater than the number of in use
596 * elements.
597 */
598 bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
599
600 /**
601 * vu_queue_fill:
602 * @dev: a VuDev context
603 * @vq: a VuVirtq queue
604 * @elem: a VuVirtqElement
605 * @len: length in bytes to write
606 * @idx: optional offset for the used ring index (0 in general)
607 *
608 * Fill the used ring with @elem element.
609 */
610 void vu_queue_fill(VuDev *dev, VuVirtq *vq,
611 const VuVirtqElement *elem,
612 unsigned int len, unsigned int idx);
613
614 /**
615 * vu_queue_push:
616 * @dev: a VuDev context
617 * @vq: a VuVirtq queue
618 * @elem: a VuVirtqElement
619 * @len: length in bytes to write
620 *
621 * Helper that combines vu_queue_fill() with a vu_queue_flush().
622 */
623 void vu_queue_push(VuDev *dev, VuVirtq *vq,
624 const VuVirtqElement *elem, unsigned int len);
625
626 /**
627 * vu_queue_flush:
628 * @dev: a VuDev context
629 * @vq: a VuVirtq queue
630 * @num: number of elements to flush
631 *
632 * Mark the last number of elements as done (used.idx is updated by
633 * num elements).
634 */
635 void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
636
637 /**
638 * vu_queue_get_avail_bytes:
639 * @dev: a VuDev context
640 * @vq: a VuVirtq queue
641 * @in_bytes: in bytes
642 * @out_bytes: out bytes
643 * @max_in_bytes: stop counting after max_in_bytes
644 * @max_out_bytes: stop counting after max_out_bytes
645 *
646 * Count the number of available bytes, up to max_in_bytes/max_out_bytes.
647 */
648 void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
649 unsigned int *out_bytes,
650 unsigned max_in_bytes, unsigned max_out_bytes);
651
652 /**
653 * vu_queue_avail_bytes:
654 * @dev: a VuDev context
655 * @vq: a VuVirtq queue
656 * @in_bytes: expected in bytes
657 * @out_bytes: expected out bytes
658 *
659 * Returns: true if in_bytes <= in_total && out_bytes <= out_total
660 */
661 bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
662 unsigned int out_bytes);
663
664 #endif /* LIBVHOST_USER_H */