virtio: disable virtqueue notifications during polling
[qemu.git] / hw / virtio / virtio.c
1 /*
2 * Virtio Support
3 *
4 * Copyright IBM, Corp. 2007
5 *
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
12 */
13
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qemu-common.h"
17 #include "cpu.h"
18 #include "trace.h"
19 #include "exec/address-spaces.h"
20 #include "qemu/error-report.h"
21 #include "hw/virtio/virtio.h"
22 #include "qemu/atomic.h"
23 #include "hw/virtio/virtio-bus.h"
24 #include "migration/migration.h"
25 #include "hw/virtio/virtio-access.h"
26
27 /*
28 * The alignment to use between consumer and producer parts of vring.
29 * x86 pagesize again. This is the default, used by transports like PCI
30 * which don't provide a means for the guest to tell the host the alignment.
31 */
32 #define VIRTIO_PCI_VRING_ALIGN 4096
33
34 typedef struct VRingDesc
35 {
36 uint64_t addr;
37 uint32_t len;
38 uint16_t flags;
39 uint16_t next;
40 } VRingDesc;
41
42 typedef struct VRingAvail
43 {
44 uint16_t flags;
45 uint16_t idx;
46 uint16_t ring[0];
47 } VRingAvail;
48
49 typedef struct VRingUsedElem
50 {
51 uint32_t id;
52 uint32_t len;
53 } VRingUsedElem;
54
55 typedef struct VRingUsed
56 {
57 uint16_t flags;
58 uint16_t idx;
59 VRingUsedElem ring[0];
60 } VRingUsed;
61
62 typedef struct VRing
63 {
64 unsigned int num;
65 unsigned int num_default;
66 unsigned int align;
67 hwaddr desc;
68 hwaddr avail;
69 hwaddr used;
70 } VRing;
71
72 struct VirtQueue
73 {
74 VRing vring;
75
76 /* Next head to pop */
77 uint16_t last_avail_idx;
78
79 /* Last avail_idx read from VQ. */
80 uint16_t shadow_avail_idx;
81
82 uint16_t used_idx;
83
84 /* Last used index value we have signalled on */
85 uint16_t signalled_used;
86
87 /* Last used index value we have signalled on */
88 bool signalled_used_valid;
89
90 /* Nested host->guest notification disabled counter */
91 unsigned int notification_disabled;
92
93 uint16_t queue_index;
94
95 int inuse;
96
97 uint16_t vector;
98 VirtIOHandleOutput handle_output;
99 VirtIOHandleOutput handle_aio_output;
100 VirtIODevice *vdev;
101 EventNotifier guest_notifier;
102 EventNotifier host_notifier;
103 QLIST_ENTRY(VirtQueue) node;
104 };
105
106 /* virt queue functions */
107 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
108 {
109 VRing *vring = &vdev->vq[n].vring;
110
111 if (!vring->desc) {
112 /* not yet setup -> nothing to do */
113 return;
114 }
115 vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
116 vring->used = vring_align(vring->avail +
117 offsetof(VRingAvail, ring[vring->num]),
118 vring->align);
119 }
120
121 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
122 hwaddr desc_pa, int i)
123 {
124 address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
125 MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
126 virtio_tswap64s(vdev, &desc->addr);
127 virtio_tswap32s(vdev, &desc->len);
128 virtio_tswap16s(vdev, &desc->flags);
129 virtio_tswap16s(vdev, &desc->next);
130 }
131
132 static inline uint16_t vring_avail_flags(VirtQueue *vq)
133 {
134 hwaddr pa;
135 pa = vq->vring.avail + offsetof(VRingAvail, flags);
136 return virtio_lduw_phys(vq->vdev, pa);
137 }
138
139 static inline uint16_t vring_avail_idx(VirtQueue *vq)
140 {
141 hwaddr pa;
142 pa = vq->vring.avail + offsetof(VRingAvail, idx);
143 vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
144 return vq->shadow_avail_idx;
145 }
146
147 static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
148 {
149 hwaddr pa;
150 pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
151 return virtio_lduw_phys(vq->vdev, pa);
152 }
153
154 static inline uint16_t vring_get_used_event(VirtQueue *vq)
155 {
156 return vring_avail_ring(vq, vq->vring.num);
157 }
158
159 static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
160 int i)
161 {
162 hwaddr pa;
163 virtio_tswap32s(vq->vdev, &uelem->id);
164 virtio_tswap32s(vq->vdev, &uelem->len);
165 pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
166 address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
167 (void *)uelem, sizeof(VRingUsedElem));
168 }
169
170 static uint16_t vring_used_idx(VirtQueue *vq)
171 {
172 hwaddr pa;
173 pa = vq->vring.used + offsetof(VRingUsed, idx);
174 return virtio_lduw_phys(vq->vdev, pa);
175 }
176
177 static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
178 {
179 hwaddr pa;
180 pa = vq->vring.used + offsetof(VRingUsed, idx);
181 virtio_stw_phys(vq->vdev, pa, val);
182 vq->used_idx = val;
183 }
184
185 static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
186 {
187 VirtIODevice *vdev = vq->vdev;
188 hwaddr pa;
189 pa = vq->vring.used + offsetof(VRingUsed, flags);
190 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
191 }
192
193 static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
194 {
195 VirtIODevice *vdev = vq->vdev;
196 hwaddr pa;
197 pa = vq->vring.used + offsetof(VRingUsed, flags);
198 virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
199 }
200
201 static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
202 {
203 hwaddr pa;
204 if (vq->notification_disabled) {
205 return;
206 }
207 pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
208 virtio_stw_phys(vq->vdev, pa, val);
209 }
210
211 void virtio_queue_set_notification(VirtQueue *vq, int enable)
212 {
213 if (enable) {
214 assert(vq->notification_disabled > 0);
215 vq->notification_disabled--;
216 } else {
217 vq->notification_disabled++;
218 }
219
220 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
221 vring_set_avail_event(vq, vring_avail_idx(vq));
222 } else if (enable) {
223 vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
224 } else {
225 vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
226 }
227 if (enable) {
228 /* Expose avail event/used flags before caller checks the avail idx. */
229 smp_mb();
230 }
231 }
232
233 int virtio_queue_ready(VirtQueue *vq)
234 {
235 return vq->vring.avail != 0;
236 }
237
238 /* Fetch avail_idx from VQ memory only when we really need to know if
239 * guest has added some buffers. */
240 int virtio_queue_empty(VirtQueue *vq)
241 {
242 if (vq->shadow_avail_idx != vq->last_avail_idx) {
243 return 0;
244 }
245
246 return vring_avail_idx(vq) == vq->last_avail_idx;
247 }
248
249 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
250 unsigned int len)
251 {
252 unsigned int offset;
253 int i;
254
255 offset = 0;
256 for (i = 0; i < elem->in_num; i++) {
257 size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
258
259 cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
260 elem->in_sg[i].iov_len,
261 1, size);
262
263 offset += size;
264 }
265
266 for (i = 0; i < elem->out_num; i++)
267 cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
268 elem->out_sg[i].iov_len,
269 0, elem->out_sg[i].iov_len);
270 }
271
272 /* virtqueue_detach_element:
273 * @vq: The #VirtQueue
274 * @elem: The #VirtQueueElement
275 * @len: number of bytes written
276 *
277 * Detach the element from the virtqueue. This function is suitable for device
278 * reset or other situations where a #VirtQueueElement is simply freed and will
279 * not be pushed or discarded.
280 */
281 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
282 unsigned int len)
283 {
284 vq->inuse--;
285 virtqueue_unmap_sg(vq, elem, len);
286 }
287
288 /* virtqueue_unpop:
289 * @vq: The #VirtQueue
290 * @elem: The #VirtQueueElement
291 * @len: number of bytes written
292 *
293 * Pretend the most recent element wasn't popped from the virtqueue. The next
294 * call to virtqueue_pop() will refetch the element.
295 */
296 void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
297 unsigned int len)
298 {
299 vq->last_avail_idx--;
300 virtqueue_detach_element(vq, elem, len);
301 }
302
303 /* virtqueue_rewind:
304 * @vq: The #VirtQueue
305 * @num: Number of elements to push back
306 *
307 * Pretend that elements weren't popped from the virtqueue. The next
308 * virtqueue_pop() will refetch the oldest element.
309 *
310 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
311 *
312 * Returns: true on success, false if @num is greater than the number of in use
313 * elements.
314 */
315 bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
316 {
317 if (num > vq->inuse) {
318 return false;
319 }
320 vq->last_avail_idx -= num;
321 vq->inuse -= num;
322 return true;
323 }
324
325 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
326 unsigned int len, unsigned int idx)
327 {
328 VRingUsedElem uelem;
329
330 trace_virtqueue_fill(vq, elem, len, idx);
331
332 virtqueue_unmap_sg(vq, elem, len);
333
334 if (unlikely(vq->vdev->broken)) {
335 return;
336 }
337
338 idx = (idx + vq->used_idx) % vq->vring.num;
339
340 uelem.id = elem->index;
341 uelem.len = len;
342 vring_used_write(vq, &uelem, idx);
343 }
344
345 void virtqueue_flush(VirtQueue *vq, unsigned int count)
346 {
347 uint16_t old, new;
348
349 if (unlikely(vq->vdev->broken)) {
350 vq->inuse -= count;
351 return;
352 }
353
354 /* Make sure buffer is written before we update index. */
355 smp_wmb();
356 trace_virtqueue_flush(vq, count);
357 old = vq->used_idx;
358 new = old + count;
359 vring_used_idx_set(vq, new);
360 vq->inuse -= count;
361 if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
362 vq->signalled_used_valid = false;
363 }
364
365 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
366 unsigned int len)
367 {
368 virtqueue_fill(vq, elem, len, 0);
369 virtqueue_flush(vq, 1);
370 }
371
372 static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
373 {
374 uint16_t num_heads = vring_avail_idx(vq) - idx;
375
376 /* Check it isn't doing very strange things with descriptor numbers. */
377 if (num_heads > vq->vring.num) {
378 virtio_error(vq->vdev, "Guest moved used index from %u to %u",
379 idx, vq->shadow_avail_idx);
380 return -EINVAL;
381 }
382 /* On success, callers read a descriptor at vq->last_avail_idx.
383 * Make sure descriptor read does not bypass avail index read. */
384 if (num_heads) {
385 smp_rmb();
386 }
387
388 return num_heads;
389 }
390
391 static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
392 unsigned int *head)
393 {
394 /* Grab the next descriptor number they're advertising, and increment
395 * the index we've seen. */
396 *head = vring_avail_ring(vq, idx % vq->vring.num);
397
398 /* If their number is silly, that's a fatal mistake. */
399 if (*head >= vq->vring.num) {
400 virtio_error(vq->vdev, "Guest says index %u is available", *head);
401 return false;
402 }
403
404 return true;
405 }
406
407 enum {
408 VIRTQUEUE_READ_DESC_ERROR = -1,
409 VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
410 VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
411 };
412
413 static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
414 hwaddr desc_pa, unsigned int max,
415 unsigned int *next)
416 {
417 /* If this descriptor says it doesn't chain, we're done. */
418 if (!(desc->flags & VRING_DESC_F_NEXT)) {
419 return VIRTQUEUE_READ_DESC_DONE;
420 }
421
422 /* Check they're not leading us off end of descriptors. */
423 *next = desc->next;
424 /* Make sure compiler knows to grab that: we don't want it changing! */
425 smp_wmb();
426
427 if (*next >= max) {
428 virtio_error(vdev, "Desc next is %u", *next);
429 return VIRTQUEUE_READ_DESC_ERROR;
430 }
431
432 vring_desc_read(vdev, desc, desc_pa, *next);
433 return VIRTQUEUE_READ_DESC_MORE;
434 }
435
436 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
437 unsigned int *out_bytes,
438 unsigned max_in_bytes, unsigned max_out_bytes)
439 {
440 unsigned int idx;
441 unsigned int total_bufs, in_total, out_total;
442 int rc;
443
444 idx = vq->last_avail_idx;
445
446 total_bufs = in_total = out_total = 0;
447 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
448 VirtIODevice *vdev = vq->vdev;
449 unsigned int max, num_bufs, indirect = 0;
450 VRingDesc desc;
451 hwaddr desc_pa;
452 unsigned int i;
453
454 max = vq->vring.num;
455 num_bufs = total_bufs;
456
457 if (!virtqueue_get_head(vq, idx++, &i)) {
458 goto err;
459 }
460
461 desc_pa = vq->vring.desc;
462 vring_desc_read(vdev, &desc, desc_pa, i);
463
464 if (desc.flags & VRING_DESC_F_INDIRECT) {
465 if (desc.len % sizeof(VRingDesc)) {
466 virtio_error(vdev, "Invalid size for indirect buffer table");
467 goto err;
468 }
469
470 /* If we've got too many, that implies a descriptor loop. */
471 if (num_bufs >= max) {
472 virtio_error(vdev, "Looped descriptor");
473 goto err;
474 }
475
476 /* loop over the indirect descriptor table */
477 indirect = 1;
478 max = desc.len / sizeof(VRingDesc);
479 desc_pa = desc.addr;
480 num_bufs = i = 0;
481 vring_desc_read(vdev, &desc, desc_pa, i);
482 }
483
484 do {
485 /* If we've got too many, that implies a descriptor loop. */
486 if (++num_bufs > max) {
487 virtio_error(vdev, "Looped descriptor");
488 goto err;
489 }
490
491 if (desc.flags & VRING_DESC_F_WRITE) {
492 in_total += desc.len;
493 } else {
494 out_total += desc.len;
495 }
496 if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
497 goto done;
498 }
499
500 rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
501 } while (rc == VIRTQUEUE_READ_DESC_MORE);
502
503 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
504 goto err;
505 }
506
507 if (!indirect)
508 total_bufs = num_bufs;
509 else
510 total_bufs++;
511 }
512
513 if (rc < 0) {
514 goto err;
515 }
516
517 done:
518 if (in_bytes) {
519 *in_bytes = in_total;
520 }
521 if (out_bytes) {
522 *out_bytes = out_total;
523 }
524 return;
525
526 err:
527 in_total = out_total = 0;
528 goto done;
529 }
530
531 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
532 unsigned int out_bytes)
533 {
534 unsigned int in_total, out_total;
535
536 virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
537 return in_bytes <= in_total && out_bytes <= out_total;
538 }
539
540 static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
541 hwaddr *addr, struct iovec *iov,
542 unsigned int max_num_sg, bool is_write,
543 hwaddr pa, size_t sz)
544 {
545 bool ok = false;
546 unsigned num_sg = *p_num_sg;
547 assert(num_sg <= max_num_sg);
548
549 if (!sz) {
550 virtio_error(vdev, "virtio: zero sized buffers are not allowed");
551 goto out;
552 }
553
554 while (sz) {
555 hwaddr len = sz;
556
557 if (num_sg == max_num_sg) {
558 virtio_error(vdev, "virtio: too many write descriptors in "
559 "indirect table");
560 goto out;
561 }
562
563 iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
564 if (!iov[num_sg].iov_base) {
565 virtio_error(vdev, "virtio: bogus descriptor or out of resources");
566 goto out;
567 }
568
569 iov[num_sg].iov_len = len;
570 addr[num_sg] = pa;
571
572 sz -= len;
573 pa += len;
574 num_sg++;
575 }
576 ok = true;
577
578 out:
579 *p_num_sg = num_sg;
580 return ok;
581 }
582
583 /* Only used by error code paths before we have a VirtQueueElement (therefore
584 * virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
585 * yet.
586 */
587 static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
588 struct iovec *iov)
589 {
590 unsigned int i;
591
592 for (i = 0; i < out_num + in_num; i++) {
593 int is_write = i >= out_num;
594
595 cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
596 iov++;
597 }
598 }
599
600 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
601 unsigned int *num_sg, unsigned int max_size,
602 int is_write)
603 {
604 unsigned int i;
605 hwaddr len;
606
607 /* Note: this function MUST validate input, some callers
608 * are passing in num_sg values received over the network.
609 */
610 /* TODO: teach all callers that this can fail, and return failure instead
611 * of asserting here.
612 * When we do, we might be able to re-enable NDEBUG below.
613 */
614 #ifdef NDEBUG
615 #error building with NDEBUG is not supported
616 #endif
617 assert(*num_sg <= max_size);
618
619 for (i = 0; i < *num_sg; i++) {
620 len = sg[i].iov_len;
621 sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
622 if (!sg[i].iov_base) {
623 error_report("virtio: error trying to map MMIO memory");
624 exit(1);
625 }
626 if (len != sg[i].iov_len) {
627 error_report("virtio: unexpected memory split");
628 exit(1);
629 }
630 }
631 }
632
633 void virtqueue_map(VirtQueueElement *elem)
634 {
635 virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
636 VIRTQUEUE_MAX_SIZE, 1);
637 virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
638 VIRTQUEUE_MAX_SIZE, 0);
639 }
640
641 static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
642 {
643 VirtQueueElement *elem;
644 size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
645 size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
646 size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
647 size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
648 size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
649 size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
650
651 assert(sz >= sizeof(VirtQueueElement));
652 elem = g_malloc(out_sg_end);
653 elem->out_num = out_num;
654 elem->in_num = in_num;
655 elem->in_addr = (void *)elem + in_addr_ofs;
656 elem->out_addr = (void *)elem + out_addr_ofs;
657 elem->in_sg = (void *)elem + in_sg_ofs;
658 elem->out_sg = (void *)elem + out_sg_ofs;
659 return elem;
660 }
661
662 void *virtqueue_pop(VirtQueue *vq, size_t sz)
663 {
664 unsigned int i, head, max;
665 hwaddr desc_pa = vq->vring.desc;
666 VirtIODevice *vdev = vq->vdev;
667 VirtQueueElement *elem;
668 unsigned out_num, in_num;
669 hwaddr addr[VIRTQUEUE_MAX_SIZE];
670 struct iovec iov[VIRTQUEUE_MAX_SIZE];
671 VRingDesc desc;
672 int rc;
673
674 if (unlikely(vdev->broken)) {
675 return NULL;
676 }
677 if (virtio_queue_empty(vq)) {
678 return NULL;
679 }
680 /* Needed after virtio_queue_empty(), see comment in
681 * virtqueue_num_heads(). */
682 smp_rmb();
683
684 /* When we start there are none of either input nor output. */
685 out_num = in_num = 0;
686
687 max = vq->vring.num;
688
689 if (vq->inuse >= vq->vring.num) {
690 virtio_error(vdev, "Virtqueue size exceeded");
691 return NULL;
692 }
693
694 if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
695 return NULL;
696 }
697
698 if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
699 vring_set_avail_event(vq, vq->last_avail_idx);
700 }
701
702 i = head;
703 vring_desc_read(vdev, &desc, desc_pa, i);
704 if (desc.flags & VRING_DESC_F_INDIRECT) {
705 if (desc.len % sizeof(VRingDesc)) {
706 virtio_error(vdev, "Invalid size for indirect buffer table");
707 return NULL;
708 }
709
710 /* loop over the indirect descriptor table */
711 max = desc.len / sizeof(VRingDesc);
712 desc_pa = desc.addr;
713 i = 0;
714 vring_desc_read(vdev, &desc, desc_pa, i);
715 }
716
717 /* Collect all the descriptors */
718 do {
719 bool map_ok;
720
721 if (desc.flags & VRING_DESC_F_WRITE) {
722 map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
723 iov + out_num,
724 VIRTQUEUE_MAX_SIZE - out_num, true,
725 desc.addr, desc.len);
726 } else {
727 if (in_num) {
728 virtio_error(vdev, "Incorrect order for descriptors");
729 goto err_undo_map;
730 }
731 map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
732 VIRTQUEUE_MAX_SIZE, false,
733 desc.addr, desc.len);
734 }
735 if (!map_ok) {
736 goto err_undo_map;
737 }
738
739 /* If we've got too many, that implies a descriptor loop. */
740 if ((in_num + out_num) > max) {
741 virtio_error(vdev, "Looped descriptor");
742 goto err_undo_map;
743 }
744
745 rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
746 } while (rc == VIRTQUEUE_READ_DESC_MORE);
747
748 if (rc == VIRTQUEUE_READ_DESC_ERROR) {
749 goto err_undo_map;
750 }
751
752 /* Now copy what we have collected and mapped */
753 elem = virtqueue_alloc_element(sz, out_num, in_num);
754 elem->index = head;
755 for (i = 0; i < out_num; i++) {
756 elem->out_addr[i] = addr[i];
757 elem->out_sg[i] = iov[i];
758 }
759 for (i = 0; i < in_num; i++) {
760 elem->in_addr[i] = addr[out_num + i];
761 elem->in_sg[i] = iov[out_num + i];
762 }
763
764 vq->inuse++;
765
766 trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
767 return elem;
768
769 err_undo_map:
770 virtqueue_undo_map_desc(out_num, in_num, iov);
771 return NULL;
772 }
773
774 /* Reading and writing a structure directly to QEMUFile is *awful*, but
775 * it is what QEMU has always done by mistake. We can change it sooner
776 * or later by bumping the version number of the affected vm states.
777 * In the meanwhile, since the in-memory layout of VirtQueueElement
778 * has changed, we need to marshal to and from the layout that was
779 * used before the change.
780 */
781 typedef struct VirtQueueElementOld {
782 unsigned int index;
783 unsigned int out_num;
784 unsigned int in_num;
785 hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
786 hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
787 struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
788 struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
789 } VirtQueueElementOld;
790
791 void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
792 {
793 VirtQueueElement *elem;
794 VirtQueueElementOld data;
795 int i;
796
797 qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
798
799 elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
800 elem->index = data.index;
801
802 for (i = 0; i < elem->in_num; i++) {
803 elem->in_addr[i] = data.in_addr[i];
804 }
805
806 for (i = 0; i < elem->out_num; i++) {
807 elem->out_addr[i] = data.out_addr[i];
808 }
809
810 for (i = 0; i < elem->in_num; i++) {
811 /* Base is overwritten by virtqueue_map. */
812 elem->in_sg[i].iov_base = 0;
813 elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
814 }
815
816 for (i = 0; i < elem->out_num; i++) {
817 /* Base is overwritten by virtqueue_map. */
818 elem->out_sg[i].iov_base = 0;
819 elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
820 }
821
822 virtqueue_map(elem);
823 return elem;
824 }
825
826 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
827 {
828 VirtQueueElementOld data;
829 int i;
830
831 memset(&data, 0, sizeof(data));
832 data.index = elem->index;
833 data.in_num = elem->in_num;
834 data.out_num = elem->out_num;
835
836 for (i = 0; i < elem->in_num; i++) {
837 data.in_addr[i] = elem->in_addr[i];
838 }
839
840 for (i = 0; i < elem->out_num; i++) {
841 data.out_addr[i] = elem->out_addr[i];
842 }
843
844 for (i = 0; i < elem->in_num; i++) {
845 /* Base is overwritten by virtqueue_map when loading. Do not
846 * save it, as it would leak the QEMU address space layout. */
847 data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
848 }
849
850 for (i = 0; i < elem->out_num; i++) {
851 /* Do not save iov_base as above. */
852 data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
853 }
854 qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
855 }
856
857 /* virtio device */
858 static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
859 {
860 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
861 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
862
863 if (unlikely(vdev->broken)) {
864 return;
865 }
866
867 if (k->notify) {
868 k->notify(qbus->parent, vector);
869 }
870 }
871
872 void virtio_update_irq(VirtIODevice *vdev)
873 {
874 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
875 }
876
877 static int virtio_validate_features(VirtIODevice *vdev)
878 {
879 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
880
881 if (k->validate_features) {
882 return k->validate_features(vdev);
883 } else {
884 return 0;
885 }
886 }
887
888 int virtio_set_status(VirtIODevice *vdev, uint8_t val)
889 {
890 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
891 trace_virtio_set_status(vdev, val);
892
893 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
894 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
895 val & VIRTIO_CONFIG_S_FEATURES_OK) {
896 int ret = virtio_validate_features(vdev);
897
898 if (ret) {
899 return ret;
900 }
901 }
902 }
903 if (k->set_status) {
904 k->set_status(vdev, val);
905 }
906 vdev->status = val;
907 return 0;
908 }
909
910 bool target_words_bigendian(void);
911 static enum virtio_device_endian virtio_default_endian(void)
912 {
913 if (target_words_bigendian()) {
914 return VIRTIO_DEVICE_ENDIAN_BIG;
915 } else {
916 return VIRTIO_DEVICE_ENDIAN_LITTLE;
917 }
918 }
919
920 static enum virtio_device_endian virtio_current_cpu_endian(void)
921 {
922 CPUClass *cc = CPU_GET_CLASS(current_cpu);
923
924 if (cc->virtio_is_big_endian(current_cpu)) {
925 return VIRTIO_DEVICE_ENDIAN_BIG;
926 } else {
927 return VIRTIO_DEVICE_ENDIAN_LITTLE;
928 }
929 }
930
931 void virtio_reset(void *opaque)
932 {
933 VirtIODevice *vdev = opaque;
934 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
935 int i;
936
937 virtio_set_status(vdev, 0);
938 if (current_cpu) {
939 /* Guest initiated reset */
940 vdev->device_endian = virtio_current_cpu_endian();
941 } else {
942 /* System reset */
943 vdev->device_endian = virtio_default_endian();
944 }
945
946 if (k->reset) {
947 k->reset(vdev);
948 }
949
950 vdev->broken = false;
951 vdev->guest_features = 0;
952 vdev->queue_sel = 0;
953 vdev->status = 0;
954 atomic_set(&vdev->isr, 0);
955 vdev->config_vector = VIRTIO_NO_VECTOR;
956 virtio_notify_vector(vdev, vdev->config_vector);
957
958 for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
959 vdev->vq[i].vring.desc = 0;
960 vdev->vq[i].vring.avail = 0;
961 vdev->vq[i].vring.used = 0;
962 vdev->vq[i].last_avail_idx = 0;
963 vdev->vq[i].shadow_avail_idx = 0;
964 vdev->vq[i].used_idx = 0;
965 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
966 vdev->vq[i].signalled_used = 0;
967 vdev->vq[i].signalled_used_valid = false;
968 vdev->vq[i].notification_disabled = 0;
969 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
970 vdev->vq[i].inuse = 0;
971 }
972 }
973
974 uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
975 {
976 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
977 uint8_t val;
978
979 if (addr + sizeof(val) > vdev->config_len) {
980 return (uint32_t)-1;
981 }
982
983 k->get_config(vdev, vdev->config);
984
985 val = ldub_p(vdev->config + addr);
986 return val;
987 }
988
989 uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
990 {
991 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
992 uint16_t val;
993
994 if (addr + sizeof(val) > vdev->config_len) {
995 return (uint32_t)-1;
996 }
997
998 k->get_config(vdev, vdev->config);
999
1000 val = lduw_p(vdev->config + addr);
1001 return val;
1002 }
1003
1004 uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
1005 {
1006 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1007 uint32_t val;
1008
1009 if (addr + sizeof(val) > vdev->config_len) {
1010 return (uint32_t)-1;
1011 }
1012
1013 k->get_config(vdev, vdev->config);
1014
1015 val = ldl_p(vdev->config + addr);
1016 return val;
1017 }
1018
1019 void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1020 {
1021 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1022 uint8_t val = data;
1023
1024 if (addr + sizeof(val) > vdev->config_len) {
1025 return;
1026 }
1027
1028 stb_p(vdev->config + addr, val);
1029
1030 if (k->set_config) {
1031 k->set_config(vdev, vdev->config);
1032 }
1033 }
1034
1035 void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1036 {
1037 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1038 uint16_t val = data;
1039
1040 if (addr + sizeof(val) > vdev->config_len) {
1041 return;
1042 }
1043
1044 stw_p(vdev->config + addr, val);
1045
1046 if (k->set_config) {
1047 k->set_config(vdev, vdev->config);
1048 }
1049 }
1050
1051 void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
1052 {
1053 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1054 uint32_t val = data;
1055
1056 if (addr + sizeof(val) > vdev->config_len) {
1057 return;
1058 }
1059
1060 stl_p(vdev->config + addr, val);
1061
1062 if (k->set_config) {
1063 k->set_config(vdev, vdev->config);
1064 }
1065 }
1066
1067 uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
1068 {
1069 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1070 uint8_t val;
1071
1072 if (addr + sizeof(val) > vdev->config_len) {
1073 return (uint32_t)-1;
1074 }
1075
1076 k->get_config(vdev, vdev->config);
1077
1078 val = ldub_p(vdev->config + addr);
1079 return val;
1080 }
1081
1082 uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
1083 {
1084 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1085 uint16_t val;
1086
1087 if (addr + sizeof(val) > vdev->config_len) {
1088 return (uint32_t)-1;
1089 }
1090
1091 k->get_config(vdev, vdev->config);
1092
1093 val = lduw_le_p(vdev->config + addr);
1094 return val;
1095 }
1096
1097 uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
1098 {
1099 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1100 uint32_t val;
1101
1102 if (addr + sizeof(val) > vdev->config_len) {
1103 return (uint32_t)-1;
1104 }
1105
1106 k->get_config(vdev, vdev->config);
1107
1108 val = ldl_le_p(vdev->config + addr);
1109 return val;
1110 }
1111
1112 void virtio_config_modern_writeb(VirtIODevice *vdev,
1113 uint32_t addr, uint32_t data)
1114 {
1115 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1116 uint8_t val = data;
1117
1118 if (addr + sizeof(val) > vdev->config_len) {
1119 return;
1120 }
1121
1122 stb_p(vdev->config + addr, val);
1123
1124 if (k->set_config) {
1125 k->set_config(vdev, vdev->config);
1126 }
1127 }
1128
1129 void virtio_config_modern_writew(VirtIODevice *vdev,
1130 uint32_t addr, uint32_t data)
1131 {
1132 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1133 uint16_t val = data;
1134
1135 if (addr + sizeof(val) > vdev->config_len) {
1136 return;
1137 }
1138
1139 stw_le_p(vdev->config + addr, val);
1140
1141 if (k->set_config) {
1142 k->set_config(vdev, vdev->config);
1143 }
1144 }
1145
1146 void virtio_config_modern_writel(VirtIODevice *vdev,
1147 uint32_t addr, uint32_t data)
1148 {
1149 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1150 uint32_t val = data;
1151
1152 if (addr + sizeof(val) > vdev->config_len) {
1153 return;
1154 }
1155
1156 stl_le_p(vdev->config + addr, val);
1157
1158 if (k->set_config) {
1159 k->set_config(vdev, vdev->config);
1160 }
1161 }
1162
1163 void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
1164 {
1165 vdev->vq[n].vring.desc = addr;
1166 virtio_queue_update_rings(vdev, n);
1167 }
1168
1169 hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
1170 {
1171 return vdev->vq[n].vring.desc;
1172 }
1173
1174 void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
1175 hwaddr avail, hwaddr used)
1176 {
1177 vdev->vq[n].vring.desc = desc;
1178 vdev->vq[n].vring.avail = avail;
1179 vdev->vq[n].vring.used = used;
1180 }
1181
1182 void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
1183 {
1184 /* Don't allow guest to flip queue between existent and
1185 * nonexistent states, or to set it to an invalid size.
1186 */
1187 if (!!num != !!vdev->vq[n].vring.num ||
1188 num > VIRTQUEUE_MAX_SIZE ||
1189 num < 0) {
1190 return;
1191 }
1192 vdev->vq[n].vring.num = num;
1193 }
1194
1195 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
1196 {
1197 return QLIST_FIRST(&vdev->vector_queues[vector]);
1198 }
1199
1200 VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
1201 {
1202 return QLIST_NEXT(vq, node);
1203 }
1204
1205 int virtio_queue_get_num(VirtIODevice *vdev, int n)
1206 {
1207 return vdev->vq[n].vring.num;
1208 }
1209
1210 int virtio_get_num_queues(VirtIODevice *vdev)
1211 {
1212 int i;
1213
1214 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1215 if (!virtio_queue_get_num(vdev, i)) {
1216 break;
1217 }
1218 }
1219
1220 return i;
1221 }
1222
1223 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
1224 {
1225 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1226 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1227
1228 /* virtio-1 compliant devices cannot change the alignment */
1229 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1230 error_report("tried to modify queue alignment for virtio-1 device");
1231 return;
1232 }
1233 /* Check that the transport told us it was going to do this
1234 * (so a buggy transport will immediately assert rather than
1235 * silently failing to migrate this state)
1236 */
1237 assert(k->has_variable_vring_alignment);
1238
1239 vdev->vq[n].vring.align = align;
1240 virtio_queue_update_rings(vdev, n);
1241 }
1242
1243 static void virtio_queue_notify_aio_vq(VirtQueue *vq)
1244 {
1245 if (vq->vring.desc && vq->handle_aio_output) {
1246 VirtIODevice *vdev = vq->vdev;
1247
1248 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1249 vq->handle_aio_output(vdev, vq);
1250 }
1251 }
1252
1253 static void virtio_queue_notify_vq(VirtQueue *vq)
1254 {
1255 if (vq->vring.desc && vq->handle_output) {
1256 VirtIODevice *vdev = vq->vdev;
1257
1258 if (unlikely(vdev->broken)) {
1259 return;
1260 }
1261
1262 trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1263 vq->handle_output(vdev, vq);
1264 }
1265 }
1266
1267 void virtio_queue_notify(VirtIODevice *vdev, int n)
1268 {
1269 virtio_queue_notify_vq(&vdev->vq[n]);
1270 }
1271
1272 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
1273 {
1274 return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1275 VIRTIO_NO_VECTOR;
1276 }
1277
1278 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
1279 {
1280 VirtQueue *vq = &vdev->vq[n];
1281
1282 if (n < VIRTIO_QUEUE_MAX) {
1283 if (vdev->vector_queues &&
1284 vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
1285 QLIST_REMOVE(vq, node);
1286 }
1287 vdev->vq[n].vector = vector;
1288 if (vdev->vector_queues &&
1289 vector != VIRTIO_NO_VECTOR) {
1290 QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
1291 }
1292 }
1293 }
1294
1295 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
1296 VirtIOHandleOutput handle_output)
1297 {
1298 int i;
1299
1300 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1301 if (vdev->vq[i].vring.num == 0)
1302 break;
1303 }
1304
1305 if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
1306 abort();
1307
1308 vdev->vq[i].vring.num = queue_size;
1309 vdev->vq[i].vring.num_default = queue_size;
1310 vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
1311 vdev->vq[i].handle_output = handle_output;
1312 vdev->vq[i].handle_aio_output = NULL;
1313
1314 return &vdev->vq[i];
1315 }
1316
1317 void virtio_del_queue(VirtIODevice *vdev, int n)
1318 {
1319 if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1320 abort();
1321 }
1322
1323 vdev->vq[n].vring.num = 0;
1324 vdev->vq[n].vring.num_default = 0;
1325 }
1326
1327 static void virtio_set_isr(VirtIODevice *vdev, int value)
1328 {
1329 uint8_t old = atomic_read(&vdev->isr);
1330
1331 /* Do not write ISR if it does not change, so that its cacheline remains
1332 * shared in the common case where the guest does not read it.
1333 */
1334 if ((old & value) != value) {
1335 atomic_or(&vdev->isr, value);
1336 }
1337 }
1338
1339 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
1340 {
1341 uint16_t old, new;
1342 bool v;
1343 /* We need to expose used array entries before checking used event. */
1344 smp_mb();
1345 /* Always notify when queue is empty (when feature acknowledge) */
1346 if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1347 !vq->inuse && virtio_queue_empty(vq)) {
1348 return true;
1349 }
1350
1351 if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
1352 return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
1353 }
1354
1355 v = vq->signalled_used_valid;
1356 vq->signalled_used_valid = true;
1357 old = vq->signalled_used;
1358 new = vq->signalled_used = vq->used_idx;
1359 return !v || vring_need_event(vring_get_used_event(vq), new, old);
1360 }
1361
1362 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
1363 {
1364 if (!virtio_should_notify(vdev, vq)) {
1365 return;
1366 }
1367
1368 trace_virtio_notify_irqfd(vdev, vq);
1369
1370 /*
1371 * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
1372 * windows drivers included in virtio-win 1.8.0 (circa 2015) are
1373 * incorrectly polling this bit during crashdump and hibernation
1374 * in MSI mode, causing a hang if this bit is never updated.
1375 * Recent releases of Windows do not really shut down, but rather
1376 * log out and hibernate to make the next startup faster. Hence,
1377 * this manifested as a more serious hang during shutdown with
1378 *
1379 * Next driver release from 2016 fixed this problem, so working around it
1380 * is not a must, but it's easy to do so let's do it here.
1381 *
1382 * Note: it's safe to update ISR from any thread as it was switched
1383 * to an atomic operation.
1384 */
1385 virtio_set_isr(vq->vdev, 0x1);
1386 event_notifier_set(&vq->guest_notifier);
1387 }
1388
1389 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
1390 {
1391 if (!virtio_should_notify(vdev, vq)) {
1392 return;
1393 }
1394
1395 trace_virtio_notify(vdev, vq);
1396 virtio_set_isr(vq->vdev, 0x1);
1397 virtio_notify_vector(vdev, vq->vector);
1398 }
1399
1400 void virtio_notify_config(VirtIODevice *vdev)
1401 {
1402 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1403 return;
1404
1405 virtio_set_isr(vdev, 0x3);
1406 vdev->generation++;
1407 virtio_notify_vector(vdev, vdev->config_vector);
1408 }
1409
1410 static bool virtio_device_endian_needed(void *opaque)
1411 {
1412 VirtIODevice *vdev = opaque;
1413
1414 assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1415 if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1416 return vdev->device_endian != virtio_default_endian();
1417 }
1418 /* Devices conforming to VIRTIO 1.0 or later are always LE. */
1419 return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1420 }
1421
1422 static bool virtio_64bit_features_needed(void *opaque)
1423 {
1424 VirtIODevice *vdev = opaque;
1425
1426 return (vdev->host_features >> 32) != 0;
1427 }
1428
1429 static bool virtio_virtqueue_needed(void *opaque)
1430 {
1431 VirtIODevice *vdev = opaque;
1432
1433 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
1434 }
1435
1436 static bool virtio_ringsize_needed(void *opaque)
1437 {
1438 VirtIODevice *vdev = opaque;
1439 int i;
1440
1441 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1442 if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
1443 return true;
1444 }
1445 }
1446 return false;
1447 }
1448
1449 static bool virtio_extra_state_needed(void *opaque)
1450 {
1451 VirtIODevice *vdev = opaque;
1452 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1453 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1454
1455 return k->has_extra_state &&
1456 k->has_extra_state(qbus->parent);
1457 }
1458
1459 static bool virtio_broken_needed(void *opaque)
1460 {
1461 VirtIODevice *vdev = opaque;
1462
1463 return vdev->broken;
1464 }
1465
1466 static const VMStateDescription vmstate_virtqueue = {
1467 .name = "virtqueue_state",
1468 .version_id = 1,
1469 .minimum_version_id = 1,
1470 .fields = (VMStateField[]) {
1471 VMSTATE_UINT64(vring.avail, struct VirtQueue),
1472 VMSTATE_UINT64(vring.used, struct VirtQueue),
1473 VMSTATE_END_OF_LIST()
1474 }
1475 };
1476
1477 static const VMStateDescription vmstate_virtio_virtqueues = {
1478 .name = "virtio/virtqueues",
1479 .version_id = 1,
1480 .minimum_version_id = 1,
1481 .needed = &virtio_virtqueue_needed,
1482 .fields = (VMStateField[]) {
1483 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1484 VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
1485 VMSTATE_END_OF_LIST()
1486 }
1487 };
1488
1489 static const VMStateDescription vmstate_ringsize = {
1490 .name = "ringsize_state",
1491 .version_id = 1,
1492 .minimum_version_id = 1,
1493 .fields = (VMStateField[]) {
1494 VMSTATE_UINT32(vring.num_default, struct VirtQueue),
1495 VMSTATE_END_OF_LIST()
1496 }
1497 };
1498
1499 static const VMStateDescription vmstate_virtio_ringsize = {
1500 .name = "virtio/ringsize",
1501 .version_id = 1,
1502 .minimum_version_id = 1,
1503 .needed = &virtio_ringsize_needed,
1504 .fields = (VMStateField[]) {
1505 VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
1506 VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
1507 VMSTATE_END_OF_LIST()
1508 }
1509 };
1510
1511 static int get_extra_state(QEMUFile *f, void *pv, size_t size)
1512 {
1513 VirtIODevice *vdev = pv;
1514 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1515 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1516
1517 if (!k->load_extra_state) {
1518 return -1;
1519 } else {
1520 return k->load_extra_state(qbus->parent, f);
1521 }
1522 }
1523
1524 static void put_extra_state(QEMUFile *f, void *pv, size_t size)
1525 {
1526 VirtIODevice *vdev = pv;
1527 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1528 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1529
1530 k->save_extra_state(qbus->parent, f);
1531 }
1532
1533 static const VMStateInfo vmstate_info_extra_state = {
1534 .name = "virtqueue_extra_state",
1535 .get = get_extra_state,
1536 .put = put_extra_state,
1537 };
1538
1539 static const VMStateDescription vmstate_virtio_extra_state = {
1540 .name = "virtio/extra_state",
1541 .version_id = 1,
1542 .minimum_version_id = 1,
1543 .needed = &virtio_extra_state_needed,
1544 .fields = (VMStateField[]) {
1545 {
1546 .name = "extra_state",
1547 .version_id = 0,
1548 .field_exists = NULL,
1549 .size = 0,
1550 .info = &vmstate_info_extra_state,
1551 .flags = VMS_SINGLE,
1552 .offset = 0,
1553 },
1554 VMSTATE_END_OF_LIST()
1555 }
1556 };
1557
1558 static const VMStateDescription vmstate_virtio_device_endian = {
1559 .name = "virtio/device_endian",
1560 .version_id = 1,
1561 .minimum_version_id = 1,
1562 .needed = &virtio_device_endian_needed,
1563 .fields = (VMStateField[]) {
1564 VMSTATE_UINT8(device_endian, VirtIODevice),
1565 VMSTATE_END_OF_LIST()
1566 }
1567 };
1568
1569 static const VMStateDescription vmstate_virtio_64bit_features = {
1570 .name = "virtio/64bit_features",
1571 .version_id = 1,
1572 .minimum_version_id = 1,
1573 .needed = &virtio_64bit_features_needed,
1574 .fields = (VMStateField[]) {
1575 VMSTATE_UINT64(guest_features, VirtIODevice),
1576 VMSTATE_END_OF_LIST()
1577 }
1578 };
1579
1580 static const VMStateDescription vmstate_virtio_broken = {
1581 .name = "virtio/broken",
1582 .version_id = 1,
1583 .minimum_version_id = 1,
1584 .needed = &virtio_broken_needed,
1585 .fields = (VMStateField[]) {
1586 VMSTATE_BOOL(broken, VirtIODevice),
1587 VMSTATE_END_OF_LIST()
1588 }
1589 };
1590
1591 static const VMStateDescription vmstate_virtio = {
1592 .name = "virtio",
1593 .version_id = 1,
1594 .minimum_version_id = 1,
1595 .minimum_version_id_old = 1,
1596 .fields = (VMStateField[]) {
1597 VMSTATE_END_OF_LIST()
1598 },
1599 .subsections = (const VMStateDescription*[]) {
1600 &vmstate_virtio_device_endian,
1601 &vmstate_virtio_64bit_features,
1602 &vmstate_virtio_virtqueues,
1603 &vmstate_virtio_ringsize,
1604 &vmstate_virtio_broken,
1605 &vmstate_virtio_extra_state,
1606 NULL
1607 }
1608 };
1609
1610 void virtio_save(VirtIODevice *vdev, QEMUFile *f)
1611 {
1612 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1613 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1614 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1615 uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
1616 int i;
1617
1618 if (k->save_config) {
1619 k->save_config(qbus->parent, f);
1620 }
1621
1622 qemu_put_8s(f, &vdev->status);
1623 qemu_put_8s(f, &vdev->isr);
1624 qemu_put_be16s(f, &vdev->queue_sel);
1625 qemu_put_be32s(f, &guest_features_lo);
1626 qemu_put_be32(f, vdev->config_len);
1627 qemu_put_buffer(f, vdev->config, vdev->config_len);
1628
1629 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1630 if (vdev->vq[i].vring.num == 0)
1631 break;
1632 }
1633
1634 qemu_put_be32(f, i);
1635
1636 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1637 if (vdev->vq[i].vring.num == 0)
1638 break;
1639
1640 qemu_put_be32(f, vdev->vq[i].vring.num);
1641 if (k->has_variable_vring_alignment) {
1642 qemu_put_be32(f, vdev->vq[i].vring.align);
1643 }
1644 /* XXX virtio-1 devices */
1645 qemu_put_be64(f, vdev->vq[i].vring.desc);
1646 qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
1647 if (k->save_queue) {
1648 k->save_queue(qbus->parent, i, f);
1649 }
1650 }
1651
1652 if (vdc->save != NULL) {
1653 vdc->save(vdev, f);
1654 }
1655
1656 if (vdc->vmsd) {
1657 vmstate_save_state(f, vdc->vmsd, vdev, NULL);
1658 }
1659
1660 /* Subsections */
1661 vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
1662 }
1663
1664 /* A wrapper for use as a VMState .put function */
1665 static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
1666 {
1667 virtio_save(VIRTIO_DEVICE(opaque), f);
1668 }
1669
1670 /* A wrapper for use as a VMState .get function */
1671 static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
1672 {
1673 VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
1674 DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
1675
1676 return virtio_load(vdev, f, dc->vmsd->version_id);
1677 }
1678
1679 const VMStateInfo virtio_vmstate_info = {
1680 .name = "virtio",
1681 .get = virtio_device_get,
1682 .put = virtio_device_put,
1683 };
1684
1685 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1686 {
1687 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1688 bool bad = (val & ~(vdev->host_features)) != 0;
1689
1690 val &= vdev->host_features;
1691 if (k->set_features) {
1692 k->set_features(vdev, val);
1693 }
1694 vdev->guest_features = val;
1695 return bad ? -1 : 0;
1696 }
1697
1698 int virtio_set_features(VirtIODevice *vdev, uint64_t val)
1699 {
1700 /*
1701 * The driver must not attempt to set features after feature negotiation
1702 * has finished.
1703 */
1704 if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
1705 return -EINVAL;
1706 }
1707 return virtio_set_features_nocheck(vdev, val);
1708 }
1709
1710 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
1711 {
1712 int i, ret;
1713 int32_t config_len;
1714 uint32_t num;
1715 uint32_t features;
1716 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1717 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1718 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
1719
1720 /*
1721 * We poison the endianness to ensure it does not get used before
1722 * subsections have been loaded.
1723 */
1724 vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;
1725
1726 if (k->load_config) {
1727 ret = k->load_config(qbus->parent, f);
1728 if (ret)
1729 return ret;
1730 }
1731
1732 qemu_get_8s(f, &vdev->status);
1733 qemu_get_8s(f, &vdev->isr);
1734 qemu_get_be16s(f, &vdev->queue_sel);
1735 if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1736 return -1;
1737 }
1738 qemu_get_be32s(f, &features);
1739
1740 /*
1741 * Temporarily set guest_features low bits - needed by
1742 * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
1743 * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
1744 *
1745 * Note: devices should always test host features in future - don't create
1746 * new dependencies like this.
1747 */
1748 vdev->guest_features = features;
1749
1750 config_len = qemu_get_be32(f);
1751
1752 /*
1753 * There are cases where the incoming config can be bigger or smaller
1754 * than what we have; so load what we have space for, and skip
1755 * any excess that's in the stream.
1756 */
1757 qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));
1758
1759 while (config_len > vdev->config_len) {
1760 qemu_get_byte(f);
1761 config_len--;
1762 }
1763
1764 num = qemu_get_be32(f);
1765
1766 if (num > VIRTIO_QUEUE_MAX) {
1767 error_report("Invalid number of virtqueues: 0x%x", num);
1768 return -1;
1769 }
1770
1771 for (i = 0; i < num; i++) {
1772 vdev->vq[i].vring.num = qemu_get_be32(f);
1773 if (k->has_variable_vring_alignment) {
1774 vdev->vq[i].vring.align = qemu_get_be32(f);
1775 }
1776 vdev->vq[i].vring.desc = qemu_get_be64(f);
1777 qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
1778 vdev->vq[i].signalled_used_valid = false;
1779 vdev->vq[i].notification_disabled = 0;
1780
1781 if (vdev->vq[i].vring.desc) {
1782 /* XXX virtio-1 devices */
1783 virtio_queue_update_rings(vdev, i);
1784 } else if (vdev->vq[i].last_avail_idx) {
1785 error_report("VQ %d address 0x0 "
1786 "inconsistent with Host index 0x%x",
1787 i, vdev->vq[i].last_avail_idx);
1788 return -1;
1789 }
1790 if (k->load_queue) {
1791 ret = k->load_queue(qbus->parent, i, f);
1792 if (ret)
1793 return ret;
1794 }
1795 }
1796
1797 virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1798
1799 if (vdc->load != NULL) {
1800 ret = vdc->load(vdev, f, version_id);
1801 if (ret) {
1802 return ret;
1803 }
1804 }
1805
1806 if (vdc->vmsd) {
1807 ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
1808 if (ret) {
1809 return ret;
1810 }
1811 }
1812
1813 /* Subsections */
1814 ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
1815 if (ret) {
1816 return ret;
1817 }
1818
1819 if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
1820 vdev->device_endian = virtio_default_endian();
1821 }
1822
1823 if (virtio_64bit_features_needed(vdev)) {
1824 /*
1825 * Subsection load filled vdev->guest_features. Run them
1826 * through virtio_set_features to sanity-check them against
1827 * host_features.
1828 */
1829 uint64_t features64 = vdev->guest_features;
1830 if (virtio_set_features_nocheck(vdev, features64) < 0) {
1831 error_report("Features 0x%" PRIx64 " unsupported. "
1832 "Allowed features: 0x%" PRIx64,
1833 features64, vdev->host_features);
1834 return -1;
1835 }
1836 } else {
1837 if (virtio_set_features_nocheck(vdev, features) < 0) {
1838 error_report("Features 0x%x unsupported. "
1839 "Allowed features: 0x%" PRIx64,
1840 features, vdev->host_features);
1841 return -1;
1842 }
1843 }
1844
1845 for (i = 0; i < num; i++) {
1846 if (vdev->vq[i].vring.desc) {
1847 uint16_t nheads;
1848 nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
1849 /* Check it isn't doing strange things with descriptor numbers. */
1850 if (nheads > vdev->vq[i].vring.num) {
1851 error_report("VQ %d size 0x%x Guest index 0x%x "
1852 "inconsistent with Host index 0x%x: delta 0x%x",
1853 i, vdev->vq[i].vring.num,
1854 vring_avail_idx(&vdev->vq[i]),
1855 vdev->vq[i].last_avail_idx, nheads);
1856 return -1;
1857 }
1858 vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1859 vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1860
1861 /*
1862 * Some devices migrate VirtQueueElements that have been popped
1863 * from the avail ring but not yet returned to the used ring.
1864 */
1865 vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
1866 vdev->vq[i].used_idx;
1867 if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
1868 error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
1869 "used_idx 0x%x",
1870 i, vdev->vq[i].vring.num,
1871 vdev->vq[i].last_avail_idx,
1872 vdev->vq[i].used_idx);
1873 return -1;
1874 }
1875 }
1876 }
1877
1878 return 0;
1879 }
1880
1881 void virtio_cleanup(VirtIODevice *vdev)
1882 {
1883 qemu_del_vm_change_state_handler(vdev->vmstate);
1884 g_free(vdev->config);
1885 g_free(vdev->vq);
1886 g_free(vdev->vector_queues);
1887 }
1888
1889 static void virtio_vmstate_change(void *opaque, int running, RunState state)
1890 {
1891 VirtIODevice *vdev = opaque;
1892 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1893 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1894 bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1895 vdev->vm_running = running;
1896
1897 if (backend_run) {
1898 virtio_set_status(vdev, vdev->status);
1899 }
1900
1901 if (k->vmstate_change) {
1902 k->vmstate_change(qbus->parent, backend_run);
1903 }
1904
1905 if (!backend_run) {
1906 virtio_set_status(vdev, vdev->status);
1907 }
1908 }
1909
1910 void virtio_instance_init_common(Object *proxy_obj, void *data,
1911 size_t vdev_size, const char *vdev_name)
1912 {
1913 DeviceState *vdev = data;
1914
1915 object_initialize(vdev, vdev_size, vdev_name);
1916 object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
1917 object_unref(OBJECT(vdev));
1918 qdev_alias_all_properties(vdev, proxy_obj);
1919 }
1920
1921 void virtio_init(VirtIODevice *vdev, const char *name,
1922 uint16_t device_id, size_t config_size)
1923 {
1924 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
1925 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1926 int i;
1927 int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;
1928
1929 if (nvectors) {
1930 vdev->vector_queues =
1931 g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
1932 }
1933
1934 vdev->device_id = device_id;
1935 vdev->status = 0;
1936 atomic_set(&vdev->isr, 0);
1937 vdev->queue_sel = 0;
1938 vdev->config_vector = VIRTIO_NO_VECTOR;
1939 vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1940 vdev->vm_running = runstate_is_running();
1941 vdev->broken = false;
1942 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1943 vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1944 vdev->vq[i].vdev = vdev;
1945 vdev->vq[i].queue_index = i;
1946 }
1947
1948 vdev->name = name;
1949 vdev->config_len = config_size;
1950 if (vdev->config_len) {
1951 vdev->config = g_malloc0(config_size);
1952 } else {
1953 vdev->config = NULL;
1954 }
1955 vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
1956 vdev);
1957 vdev->device_endian = virtio_default_endian();
1958 vdev->use_guest_notifier_mask = true;
1959 }
1960
1961 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1962 {
1963 return vdev->vq[n].vring.desc;
1964 }
1965
1966 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1967 {
1968 return vdev->vq[n].vring.avail;
1969 }
1970
1971 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1972 {
1973 return vdev->vq[n].vring.used;
1974 }
1975
1976 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1977 {
1978 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
1979 }
1980
1981 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1982 {
1983 return offsetof(VRingAvail, ring) +
1984 sizeof(uint16_t) * vdev->vq[n].vring.num;
1985 }
1986
1987 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1988 {
1989 return offsetof(VRingUsed, ring) +
1990 sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
1991 }
1992
1993 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
1994 {
1995 return vdev->vq[n].last_avail_idx;
1996 }
1997
1998 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
1999 {
2000 vdev->vq[n].last_avail_idx = idx;
2001 vdev->vq[n].shadow_avail_idx = idx;
2002 }
2003
2004 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
2005 {
2006 vdev->vq[n].signalled_used_valid = false;
2007 }
2008
2009 VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
2010 {
2011 return vdev->vq + n;
2012 }
2013
2014 uint16_t virtio_get_queue_index(VirtQueue *vq)
2015 {
2016 return vq->queue_index;
2017 }
2018
2019 static void virtio_queue_guest_notifier_read(EventNotifier *n)
2020 {
2021 VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
2022 if (event_notifier_test_and_clear(n)) {
2023 virtio_notify_vector(vq->vdev, vq->vector);
2024 }
2025 }
2026
2027 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
2028 bool with_irqfd)
2029 {
2030 if (assign && !with_irqfd) {
2031 event_notifier_set_handler(&vq->guest_notifier, false,
2032 virtio_queue_guest_notifier_read);
2033 } else {
2034 event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2035 }
2036 if (!assign) {
2037 /* Test and clear notifier before closing it,
2038 * in case poll callback didn't have time to run. */
2039 virtio_queue_guest_notifier_read(&vq->guest_notifier);
2040 }
2041 }
2042
2043 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
2044 {
2045 return &vq->guest_notifier;
2046 }
2047
2048 static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2049 {
2050 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2051 if (event_notifier_test_and_clear(n)) {
2052 virtio_queue_notify_aio_vq(vq);
2053 }
2054 }
2055
2056 static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
2057 {
2058 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2059
2060 virtio_queue_set_notification(vq, 0);
2061 }
2062
2063 static bool virtio_queue_host_notifier_aio_poll(void *opaque)
2064 {
2065 EventNotifier *n = opaque;
2066 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2067
2068 if (virtio_queue_empty(vq)) {
2069 return false;
2070 }
2071
2072 virtio_queue_notify_aio_vq(vq);
2073 return true;
2074 }
2075
2076 static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
2077 {
2078 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2079
2080 /* Caller polls once more after this to catch requests that race with us */
2081 virtio_queue_set_notification(vq, 1);
2082 }
2083
2084 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2085 VirtIOHandleOutput handle_output)
2086 {
2087 if (handle_output) {
2088 vq->handle_aio_output = handle_output;
2089 aio_set_event_notifier(ctx, &vq->host_notifier, true,
2090 virtio_queue_host_notifier_aio_read,
2091 virtio_queue_host_notifier_aio_poll);
2092 aio_set_event_notifier_poll(ctx, &vq->host_notifier,
2093 virtio_queue_host_notifier_aio_poll_begin,
2094 virtio_queue_host_notifier_aio_poll_end);
2095 } else {
2096 aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2097 /* Test and clear notifier before after disabling event,
2098 * in case poll callback didn't have time to run. */
2099 virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2100 vq->handle_aio_output = NULL;
2101 }
2102 }
2103
2104 void virtio_queue_host_notifier_read(EventNotifier *n)
2105 {
2106 VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2107 if (event_notifier_test_and_clear(n)) {
2108 virtio_queue_notify_vq(vq);
2109 }
2110 }
2111
2112 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
2113 {
2114 return &vq->host_notifier;
2115 }
2116
2117 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
2118 {
2119 g_free(vdev->bus_name);
2120 vdev->bus_name = g_strdup(bus_name);
2121 }
2122
2123 void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
2124 {
2125 va_list ap;
2126
2127 va_start(ap, fmt);
2128 error_vreport(fmt, ap);
2129 va_end(ap);
2130
2131 vdev->broken = true;
2132
2133 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
2134 virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
2135 virtio_notify_config(vdev);
2136 }
2137 }
2138
2139 static void virtio_device_realize(DeviceState *dev, Error **errp)
2140 {
2141 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2142 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2143 Error *err = NULL;
2144
2145 /* Devices should either use vmsd or the load/save methods */
2146 assert(!vdc->vmsd || !vdc->load);
2147
2148 if (vdc->realize != NULL) {
2149 vdc->realize(dev, &err);
2150 if (err != NULL) {
2151 error_propagate(errp, err);
2152 return;
2153 }
2154 }
2155
2156 virtio_bus_device_plugged(vdev, &err);
2157 if (err != NULL) {
2158 error_propagate(errp, err);
2159 return;
2160 }
2161 }
2162
2163 static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2164 {
2165 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2166 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
2167 Error *err = NULL;
2168
2169 virtio_bus_device_unplugged(vdev);
2170
2171 if (vdc->unrealize != NULL) {
2172 vdc->unrealize(dev, &err);
2173 if (err != NULL) {
2174 error_propagate(errp, err);
2175 return;
2176 }
2177 }
2178
2179 g_free(vdev->bus_name);
2180 vdev->bus_name = NULL;
2181 }
2182
2183 static Property virtio_properties[] = {
2184 DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
2185 DEFINE_PROP_END_OF_LIST(),
2186 };
2187
2188 static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
2189 {
2190 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2191 int n, r, err;
2192
2193 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2194 VirtQueue *vq = &vdev->vq[n];
2195 if (!virtio_queue_get_num(vdev, n)) {
2196 continue;
2197 }
2198 r = virtio_bus_set_host_notifier(qbus, n, true);
2199 if (r < 0) {
2200 err = r;
2201 goto assign_error;
2202 }
2203 event_notifier_set_handler(&vq->host_notifier, true,
2204 virtio_queue_host_notifier_read);
2205 }
2206
2207 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2208 /* Kick right away to begin processing requests already in vring */
2209 VirtQueue *vq = &vdev->vq[n];
2210 if (!vq->vring.num) {
2211 continue;
2212 }
2213 event_notifier_set(&vq->host_notifier);
2214 }
2215 return 0;
2216
2217 assign_error:
2218 while (--n >= 0) {
2219 VirtQueue *vq = &vdev->vq[n];
2220 if (!virtio_queue_get_num(vdev, n)) {
2221 continue;
2222 }
2223
2224 event_notifier_set_handler(&vq->host_notifier, true, NULL);
2225 r = virtio_bus_set_host_notifier(qbus, n, false);
2226 assert(r >= 0);
2227 }
2228 return err;
2229 }
2230
2231 int virtio_device_start_ioeventfd(VirtIODevice *vdev)
2232 {
2233 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2234 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2235
2236 return virtio_bus_start_ioeventfd(vbus);
2237 }
2238
2239 static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
2240 {
2241 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
2242 int n, r;
2243
2244 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2245 VirtQueue *vq = &vdev->vq[n];
2246
2247 if (!virtio_queue_get_num(vdev, n)) {
2248 continue;
2249 }
2250 event_notifier_set_handler(&vq->host_notifier, true, NULL);
2251 r = virtio_bus_set_host_notifier(qbus, n, false);
2252 assert(r >= 0);
2253 }
2254 }
2255
2256 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
2257 {
2258 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2259 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2260
2261 virtio_bus_stop_ioeventfd(vbus);
2262 }
2263
2264 int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
2265 {
2266 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2267 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2268
2269 return virtio_bus_grab_ioeventfd(vbus);
2270 }
2271
2272 void virtio_device_release_ioeventfd(VirtIODevice *vdev)
2273 {
2274 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2275 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2276
2277 virtio_bus_release_ioeventfd(vbus);
2278 }
2279
2280 static void virtio_device_class_init(ObjectClass *klass, void *data)
2281 {
2282 /* Set the default value here. */
2283 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2284 DeviceClass *dc = DEVICE_CLASS(klass);
2285
2286 dc->realize = virtio_device_realize;
2287 dc->unrealize = virtio_device_unrealize;
2288 dc->bus_type = TYPE_VIRTIO_BUS;
2289 dc->props = virtio_properties;
2290 vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
2291 vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2292
2293 vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2294 }
2295
2296 bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
2297 {
2298 BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
2299 VirtioBusState *vbus = VIRTIO_BUS(qbus);
2300
2301 return virtio_bus_ioeventfd_enabled(vbus);
2302 }
2303
2304 static const TypeInfo virtio_device_info = {
2305 .name = TYPE_VIRTIO_DEVICE,
2306 .parent = TYPE_DEVICE,
2307 .instance_size = sizeof(VirtIODevice),
2308 .class_init = virtio_device_class_init,
2309 .abstract = true,
2310 .class_size = sizeof(VirtioDeviceClass),
2311 };
2312
2313 static void virtio_register_types(void)
2314 {
2315 type_register_static(&virtio_device_info);
2316 }
2317
2318 type_init(virtio_register_types)