4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "migration/blocker.h"
24 #include "qemu/cutils.h"
25 #include "qemu/main-loop.h"
28 #include "qapi/error.h"
31 * Return one past the end of the end of section. Be careful with uint64_t
34 static Int128
vhost_vdpa_section_end(const MemoryRegionSection
*section
)
36 Int128 llend
= int128_make64(section
->offset_within_address_space
);
37 llend
= int128_add(llend
, section
->size
);
38 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
43 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
*section
,
49 if ((!memory_region_is_ram(section
->mr
) &&
50 !memory_region_is_iommu(section
->mr
)) ||
51 memory_region_is_protected(section
->mr
) ||
52 /* vhost-vDPA doesn't allow MMIO to be mapped */
53 memory_region_is_ram_device(section
->mr
)) {
57 if (section
->offset_within_address_space
< iova_min
) {
58 error_report("RAM section out of device range (min=0x%" PRIx64
59 ", addr=0x%" HWADDR_PRIx
")",
60 iova_min
, section
->offset_within_address_space
);
64 llend
= vhost_vdpa_section_end(section
);
65 if (int128_gt(llend
, int128_make64(iova_max
))) {
66 error_report("RAM section out of device range (max=0x%" PRIx64
67 ", end addr=0x%" PRIx64
")",
68 iova_max
, int128_get64(llend
));
76 * The caller must set asid = 0 if the device does not support asid.
77 * This is not an ABI break since it is set to 0 by the initializer anyway.
79 int vhost_vdpa_dma_map(struct vhost_vdpa
*v
, uint32_t asid
, hwaddr iova
,
80 hwaddr size
, void *vaddr
, bool readonly
)
82 struct vhost_msg_v2 msg
= {};
83 int fd
= v
->device_fd
;
86 msg
.type
= v
->msg_type
;
88 msg
.iotlb
.iova
= iova
;
89 msg
.iotlb
.size
= size
;
90 msg
.iotlb
.uaddr
= (uint64_t)(uintptr_t)vaddr
;
91 msg
.iotlb
.perm
= readonly ? VHOST_ACCESS_RO
: VHOST_ACCESS_RW
;
92 msg
.iotlb
.type
= VHOST_IOTLB_UPDATE
;
94 trace_vhost_vdpa_dma_map(v
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
95 msg
.iotlb
.size
, msg
.iotlb
.uaddr
, msg
.iotlb
.perm
,
98 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
99 error_report("failed to write, fd=%d, errno=%d (%s)",
100 fd
, errno
, strerror(errno
));
108 * The caller must set asid = 0 if the device does not support asid.
109 * This is not an ABI break since it is set to 0 by the initializer anyway.
111 int vhost_vdpa_dma_unmap(struct vhost_vdpa
*v
, uint32_t asid
, hwaddr iova
,
114 struct vhost_msg_v2 msg
= {};
115 int fd
= v
->device_fd
;
118 msg
.type
= v
->msg_type
;
120 msg
.iotlb
.iova
= iova
;
121 msg
.iotlb
.size
= size
;
122 msg
.iotlb
.type
= VHOST_IOTLB_INVALIDATE
;
124 trace_vhost_vdpa_dma_unmap(v
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
125 msg
.iotlb
.size
, msg
.iotlb
.type
);
127 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
128 error_report("failed to write, fd=%d, errno=%d (%s)",
129 fd
, errno
, strerror(errno
));
136 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa
*v
)
138 int fd
= v
->device_fd
;
139 struct vhost_msg_v2 msg
= {
141 .iotlb
.type
= VHOST_IOTLB_BATCH_BEGIN
,
144 trace_vhost_vdpa_listener_begin_batch(v
, fd
, msg
.type
, msg
.iotlb
.type
);
145 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
146 error_report("failed to write, fd=%d, errno=%d (%s)",
147 fd
, errno
, strerror(errno
));
151 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa
*v
)
153 if (v
->dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
) &&
154 !v
->iotlb_batch_begin_sent
) {
155 vhost_vdpa_listener_begin_batch(v
);
158 v
->iotlb_batch_begin_sent
= true;
161 static void vhost_vdpa_listener_commit(MemoryListener
*listener
)
163 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
164 struct vhost_dev
*dev
= v
->dev
;
165 struct vhost_msg_v2 msg
= {};
166 int fd
= v
->device_fd
;
168 if (!(dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
))) {
172 if (!v
->iotlb_batch_begin_sent
) {
176 msg
.type
= v
->msg_type
;
177 msg
.iotlb
.type
= VHOST_IOTLB_BATCH_END
;
179 trace_vhost_vdpa_listener_commit(v
, fd
, msg
.type
, msg
.iotlb
.type
);
180 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
181 error_report("failed to write, fd=%d, errno=%d (%s)",
182 fd
, errno
, strerror(errno
));
185 v
->iotlb_batch_begin_sent
= false;
188 static void vhost_vdpa_listener_region_add(MemoryListener
*listener
,
189 MemoryRegionSection
*section
)
191 DMAMap mem_region
= {};
192 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
194 Int128 llend
, llsize
;
198 if (vhost_vdpa_listener_skipped_section(section
, v
->iova_range
.first
,
199 v
->iova_range
.last
)) {
203 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
204 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
205 error_report("%s received unaligned region", __func__
);
209 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
210 llend
= vhost_vdpa_section_end(section
);
211 if (int128_ge(int128_make64(iova
), llend
)) {
215 memory_region_ref(section
->mr
);
217 /* Here we assume that memory_region_is_ram(section->mr)==true */
219 vaddr
= memory_region_get_ram_ptr(section
->mr
) +
220 section
->offset_within_region
+
221 (iova
- section
->offset_within_address_space
);
223 trace_vhost_vdpa_listener_region_add(v
, iova
, int128_get64(llend
),
224 vaddr
, section
->readonly
);
226 llsize
= int128_sub(llend
, int128_make64(iova
));
227 if (v
->shadow_data
) {
230 mem_region
.translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
231 mem_region
.size
= int128_get64(llsize
) - 1,
232 mem_region
.perm
= IOMMU_ACCESS_FLAG(true, section
->readonly
),
234 r
= vhost_iova_tree_map_alloc(v
->iova_tree
, &mem_region
);
235 if (unlikely(r
!= IOVA_OK
)) {
236 error_report("Can't allocate a mapping (%d)", r
);
240 iova
= mem_region
.iova
;
243 vhost_vdpa_iotlb_batch_begin_once(v
);
244 ret
= vhost_vdpa_dma_map(v
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
245 int128_get64(llsize
), vaddr
, section
->readonly
);
247 error_report("vhost vdpa map fail!");
254 if (v
->shadow_data
) {
255 vhost_iova_tree_remove(v
->iova_tree
, mem_region
);
260 * On the initfn path, store the first error in the container so we
261 * can gracefully fail. Runtime, there's not much we can do other
262 * than throw a hardware error.
264 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
269 static void vhost_vdpa_listener_region_del(MemoryListener
*listener
,
270 MemoryRegionSection
*section
)
272 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
274 Int128 llend
, llsize
;
277 if (vhost_vdpa_listener_skipped_section(section
, v
->iova_range
.first
,
278 v
->iova_range
.last
)) {
282 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
283 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
284 error_report("%s received unaligned region", __func__
);
288 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
289 llend
= vhost_vdpa_section_end(section
);
291 trace_vhost_vdpa_listener_region_del(v
, iova
, int128_get64(llend
));
293 if (int128_ge(int128_make64(iova
), llend
)) {
297 llsize
= int128_sub(llend
, int128_make64(iova
));
299 if (v
->shadow_data
) {
300 const DMAMap
*result
;
301 const void *vaddr
= memory_region_get_ram_ptr(section
->mr
) +
302 section
->offset_within_region
+
303 (iova
- section
->offset_within_address_space
);
304 DMAMap mem_region
= {
305 .translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
306 .size
= int128_get64(llsize
) - 1,
309 result
= vhost_iova_tree_find_iova(v
->iova_tree
, &mem_region
);
311 /* The memory listener map wasn't mapped */
315 vhost_iova_tree_remove(v
->iova_tree
, *result
);
317 vhost_vdpa_iotlb_batch_begin_once(v
);
318 ret
= vhost_vdpa_dma_unmap(v
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
319 int128_get64(llsize
));
321 error_report("vhost_vdpa dma unmap error!");
324 memory_region_unref(section
->mr
);
327 * IOTLB API is used by vhost-vdpa which requires incremental updating
328 * of the mapping. So we can not use generic vhost memory listener which
329 * depends on the addnop().
331 static const MemoryListener vhost_vdpa_memory_listener
= {
332 .name
= "vhost-vdpa",
333 .commit
= vhost_vdpa_listener_commit
,
334 .region_add
= vhost_vdpa_listener_region_add
,
335 .region_del
= vhost_vdpa_listener_region_del
,
338 static int vhost_vdpa_call(struct vhost_dev
*dev
, unsigned long int request
,
341 struct vhost_vdpa
*v
= dev
->opaque
;
342 int fd
= v
->device_fd
;
345 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
347 ret
= ioctl(fd
, request
, arg
);
348 return ret
< 0 ?
-errno
: ret
;
351 static int vhost_vdpa_add_status(struct vhost_dev
*dev
, uint8_t status
)
356 trace_vhost_vdpa_add_status(dev
, status
);
357 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
364 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &s
);
369 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
381 int vhost_vdpa_get_iova_range(int fd
, struct vhost_vdpa_iova_range
*iova_range
)
383 int ret
= ioctl(fd
, VHOST_VDPA_GET_IOVA_RANGE
, iova_range
);
385 return ret
< 0 ?
-errno
: 0;
389 * The use of this function is for requests that only need to be
390 * applied once. Typically such request occurs at the beginning
391 * of operation, and before setting up queues. It should not be
392 * used for request that performs operation until all queues are
393 * set, which would need to check dev->vq_index_end instead.
395 static bool vhost_vdpa_first_dev(struct vhost_dev
*dev
)
397 struct vhost_vdpa
*v
= dev
->opaque
;
399 return v
->index
== 0;
402 static int vhost_vdpa_get_dev_features(struct vhost_dev
*dev
,
407 ret
= vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, features
);
408 trace_vhost_vdpa_get_features(dev
, *features
);
412 static void vhost_vdpa_init_svq(struct vhost_dev
*hdev
, struct vhost_vdpa
*v
)
414 g_autoptr(GPtrArray
) shadow_vqs
= NULL
;
416 shadow_vqs
= g_ptr_array_new_full(hdev
->nvqs
, vhost_svq_free
);
417 for (unsigned n
= 0; n
< hdev
->nvqs
; ++n
) {
418 VhostShadowVirtqueue
*svq
;
420 svq
= vhost_svq_new(v
->shadow_vq_ops
, v
->shadow_vq_ops_opaque
);
421 g_ptr_array_add(shadow_vqs
, svq
);
424 v
->shadow_vqs
= g_steal_pointer(&shadow_vqs
);
427 static int vhost_vdpa_init(struct vhost_dev
*dev
, void *opaque
, Error
**errp
)
429 struct vhost_vdpa
*v
;
430 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
431 trace_vhost_vdpa_init(dev
, opaque
);
435 * Similar to VFIO, we end up pinning all guest memory and have to
436 * disable discarding of RAM.
438 ret
= ram_block_discard_disable(true);
440 error_report("Cannot set discarding of RAM broken");
446 dev
->opaque
= opaque
;
447 v
->listener
= vhost_vdpa_memory_listener
;
448 v
->msg_type
= VHOST_IOTLB_MSG_V2
;
449 vhost_vdpa_init_svq(dev
, v
);
451 if (!vhost_vdpa_first_dev(dev
)) {
455 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
456 VIRTIO_CONFIG_S_DRIVER
);
461 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev
*dev
,
464 size_t page_size
= qemu_real_host_page_size();
465 struct vhost_vdpa
*v
= dev
->opaque
;
466 VirtIODevice
*vdev
= dev
->vdev
;
467 VhostVDPAHostNotifier
*n
;
469 n
= &v
->notifier
[queue_index
];
472 virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, false);
473 object_unparent(OBJECT(&n
->mr
));
474 munmap(n
->addr
, page_size
);
479 static int vhost_vdpa_host_notifier_init(struct vhost_dev
*dev
, int queue_index
)
481 size_t page_size
= qemu_real_host_page_size();
482 struct vhost_vdpa
*v
= dev
->opaque
;
483 VirtIODevice
*vdev
= dev
->vdev
;
484 VhostVDPAHostNotifier
*n
;
485 int fd
= v
->device_fd
;
489 vhost_vdpa_host_notifier_uninit(dev
, queue_index
);
491 n
= &v
->notifier
[queue_index
];
493 addr
= mmap(NULL
, page_size
, PROT_WRITE
, MAP_SHARED
, fd
,
494 queue_index
* page_size
);
495 if (addr
== MAP_FAILED
) {
499 name
= g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
501 memory_region_init_ram_device_ptr(&n
->mr
, OBJECT(vdev
), name
,
505 if (virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, true)) {
506 object_unparent(OBJECT(&n
->mr
));
507 munmap(addr
, page_size
);
518 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev
*dev
, int n
)
523 * Pack all the changes to the memory regions in a single
524 * transaction to avoid a few updating of the address space
527 memory_region_transaction_begin();
529 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ n
; i
++) {
530 vhost_vdpa_host_notifier_uninit(dev
, i
);
533 memory_region_transaction_commit();
536 static void vhost_vdpa_host_notifiers_init(struct vhost_dev
*dev
)
538 struct vhost_vdpa
*v
= dev
->opaque
;
541 if (v
->shadow_vqs_enabled
) {
542 /* FIXME SVQ is not compatible with host notifiers mr */
547 * Pack all the changes to the memory regions in a single
548 * transaction to avoid a few updating of the address space
551 memory_region_transaction_begin();
553 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ dev
->nvqs
; i
++) {
554 if (vhost_vdpa_host_notifier_init(dev
, i
)) {
555 vhost_vdpa_host_notifiers_uninit(dev
, i
- dev
->vq_index
);
560 memory_region_transaction_commit();
563 static void vhost_vdpa_svq_cleanup(struct vhost_dev
*dev
)
565 struct vhost_vdpa
*v
= dev
->opaque
;
568 for (idx
= 0; idx
< v
->shadow_vqs
->len
; ++idx
) {
569 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, idx
));
571 g_ptr_array_free(v
->shadow_vqs
, true);
574 static int vhost_vdpa_cleanup(struct vhost_dev
*dev
)
576 struct vhost_vdpa
*v
;
577 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
579 trace_vhost_vdpa_cleanup(dev
, v
);
580 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
581 memory_listener_unregister(&v
->listener
);
582 vhost_vdpa_svq_cleanup(dev
);
585 ram_block_discard_disable(false);
590 static int vhost_vdpa_memslots_limit(struct vhost_dev
*dev
)
592 trace_vhost_vdpa_memslots_limit(dev
, INT_MAX
);
596 static int vhost_vdpa_set_mem_table(struct vhost_dev
*dev
,
597 struct vhost_memory
*mem
)
599 if (!vhost_vdpa_first_dev(dev
)) {
603 trace_vhost_vdpa_set_mem_table(dev
, mem
->nregions
, mem
->padding
);
604 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE
) &&
605 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS
)) {
607 for (i
= 0; i
< mem
->nregions
; i
++) {
608 trace_vhost_vdpa_dump_regions(dev
, i
,
609 mem
->regions
[i
].guest_phys_addr
,
610 mem
->regions
[i
].memory_size
,
611 mem
->regions
[i
].userspace_addr
,
612 mem
->regions
[i
].flags_padding
);
622 static int vhost_vdpa_set_features(struct vhost_dev
*dev
,
625 struct vhost_vdpa
*v
= dev
->opaque
;
628 if (!vhost_vdpa_first_dev(dev
)) {
632 if (v
->shadow_vqs_enabled
) {
633 if ((v
->acked_features
^ features
) == BIT_ULL(VHOST_F_LOG_ALL
)) {
635 * QEMU is just trying to enable or disable logging. SVQ handles
636 * this sepparately, so no need to forward this.
638 v
->acked_features
= features
;
642 v
->acked_features
= features
;
644 /* We must not ack _F_LOG if SVQ is enabled */
645 features
&= ~BIT_ULL(VHOST_F_LOG_ALL
);
648 trace_vhost_vdpa_set_features(dev
, features
);
649 ret
= vhost_vdpa_call(dev
, VHOST_SET_FEATURES
, &features
);
654 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_FEATURES_OK
);
657 static int vhost_vdpa_set_backend_cap(struct vhost_dev
*dev
)
660 uint64_t f
= 0x1ULL
<< VHOST_BACKEND_F_IOTLB_MSG_V2
|
661 0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
|
662 0x1ULL
<< VHOST_BACKEND_F_IOTLB_ASID
;
665 if (vhost_vdpa_call(dev
, VHOST_GET_BACKEND_FEATURES
, &features
)) {
671 if (vhost_vdpa_first_dev(dev
)) {
672 r
= vhost_vdpa_call(dev
, VHOST_SET_BACKEND_FEATURES
, &features
);
678 dev
->backend_cap
= features
;
683 static int vhost_vdpa_get_device_id(struct vhost_dev
*dev
,
687 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_DEVICE_ID
, device_id
);
688 trace_vhost_vdpa_get_device_id(dev
, *device_id
);
692 static void vhost_vdpa_reset_svq(struct vhost_vdpa
*v
)
694 if (!v
->shadow_vqs_enabled
) {
698 for (unsigned i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
699 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
704 static int vhost_vdpa_reset_device(struct vhost_dev
*dev
)
706 struct vhost_vdpa
*v
= dev
->opaque
;
710 vhost_vdpa_reset_svq(v
);
712 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &status
);
713 trace_vhost_vdpa_reset_device(dev
, status
);
717 static int vhost_vdpa_get_vq_index(struct vhost_dev
*dev
, int idx
)
719 assert(idx
>= dev
->vq_index
&& idx
< dev
->vq_index
+ dev
->nvqs
);
721 trace_vhost_vdpa_get_vq_index(dev
, idx
, idx
);
725 static int vhost_vdpa_set_vring_ready(struct vhost_dev
*dev
)
728 trace_vhost_vdpa_set_vring_ready(dev
);
729 for (i
= 0; i
< dev
->nvqs
; ++i
) {
730 struct vhost_vring_state state
= {
731 .index
= dev
->vq_index
+ i
,
734 vhost_vdpa_call(dev
, VHOST_VDPA_SET_VRING_ENABLE
, &state
);
739 static int vhost_vdpa_set_config_call(struct vhost_dev
*dev
,
742 trace_vhost_vdpa_set_config_call(dev
, fd
);
743 return vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG_CALL
, &fd
);
746 static void vhost_vdpa_dump_config(struct vhost_dev
*dev
, const uint8_t *config
,
750 char line
[QEMU_HEXDUMP_LINE_LEN
];
752 for (b
= 0; b
< config_len
; b
+= 16) {
753 len
= config_len
- b
;
754 qemu_hexdump_line(line
, b
, config
, len
, false);
755 trace_vhost_vdpa_dump_config(dev
, line
);
759 static int vhost_vdpa_set_config(struct vhost_dev
*dev
, const uint8_t *data
,
760 uint32_t offset
, uint32_t size
,
763 struct vhost_vdpa_config
*config
;
765 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
767 trace_vhost_vdpa_set_config(dev
, offset
, size
, flags
);
768 config
= g_malloc(size
+ config_size
);
769 config
->off
= offset
;
771 memcpy(config
->buf
, data
, size
);
772 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG
) &&
773 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
774 vhost_vdpa_dump_config(dev
, data
, size
);
776 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG
, config
);
781 static int vhost_vdpa_get_config(struct vhost_dev
*dev
, uint8_t *config
,
782 uint32_t config_len
, Error
**errp
)
784 struct vhost_vdpa_config
*v_config
;
785 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
788 trace_vhost_vdpa_get_config(dev
, config
, config_len
);
789 v_config
= g_malloc(config_len
+ config_size
);
790 v_config
->len
= config_len
;
792 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_CONFIG
, v_config
);
793 memcpy(config
, v_config
->buf
, config_len
);
795 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG
) &&
796 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
797 vhost_vdpa_dump_config(dev
, config
, config_len
);
802 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev
*dev
,
803 struct vhost_vring_state
*ring
)
805 trace_vhost_vdpa_set_vring_base(dev
, ring
->index
, ring
->num
);
806 return vhost_vdpa_call(dev
, VHOST_SET_VRING_BASE
, ring
);
809 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev
*dev
,
810 struct vhost_vring_file
*file
)
812 trace_vhost_vdpa_set_vring_kick(dev
, file
->index
, file
->fd
);
813 return vhost_vdpa_call(dev
, VHOST_SET_VRING_KICK
, file
);
816 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev
*dev
,
817 struct vhost_vring_file
*file
)
819 trace_vhost_vdpa_set_vring_call(dev
, file
->index
, file
->fd
);
820 return vhost_vdpa_call(dev
, VHOST_SET_VRING_CALL
, file
);
823 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev
*dev
,
824 struct vhost_vring_addr
*addr
)
826 trace_vhost_vdpa_set_vring_addr(dev
, addr
->index
, addr
->flags
,
827 addr
->desc_user_addr
, addr
->used_user_addr
,
828 addr
->avail_user_addr
,
829 addr
->log_guest_addr
);
831 return vhost_vdpa_call(dev
, VHOST_SET_VRING_ADDR
, addr
);
836 * Set the shadow virtqueue descriptors to the device
838 * @dev: The vhost device model
839 * @svq: The shadow virtqueue
840 * @idx: The index of the virtqueue in the vhost device
843 * Note that this function does not rewind kick file descriptor if cannot set
846 static int vhost_vdpa_svq_set_fds(struct vhost_dev
*dev
,
847 VhostShadowVirtqueue
*svq
, unsigned idx
,
850 struct vhost_vring_file file
= {
851 .index
= dev
->vq_index
+ idx
,
853 const EventNotifier
*event_notifier
= &svq
->hdev_kick
;
856 r
= event_notifier_init(&svq
->hdev_kick
, 0);
858 error_setg_errno(errp
, -r
, "Couldn't create kick event notifier");
859 goto err_init_hdev_kick
;
862 r
= event_notifier_init(&svq
->hdev_call
, 0);
864 error_setg_errno(errp
, -r
, "Couldn't create call event notifier");
865 goto err_init_hdev_call
;
868 file
.fd
= event_notifier_get_fd(event_notifier
);
869 r
= vhost_vdpa_set_vring_dev_kick(dev
, &file
);
870 if (unlikely(r
!= 0)) {
871 error_setg_errno(errp
, -r
, "Can't set device kick fd");
872 goto err_init_set_dev_fd
;
875 event_notifier
= &svq
->hdev_call
;
876 file
.fd
= event_notifier_get_fd(event_notifier
);
877 r
= vhost_vdpa_set_vring_dev_call(dev
, &file
);
878 if (unlikely(r
!= 0)) {
879 error_setg_errno(errp
, -r
, "Can't set device call fd");
880 goto err_init_set_dev_fd
;
886 event_notifier_set_handler(&svq
->hdev_call
, NULL
);
889 event_notifier_cleanup(&svq
->hdev_kick
);
896 * Unmap a SVQ area in the device
898 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa
*v
, hwaddr addr
)
900 const DMAMap needle
= {
901 .translated_addr
= addr
,
903 const DMAMap
*result
= vhost_iova_tree_find_iova(v
->iova_tree
, &needle
);
907 if (unlikely(!result
)) {
908 error_report("Unable to find SVQ address to unmap");
912 size
= ROUND_UP(result
->size
, qemu_real_host_page_size());
913 r
= vhost_vdpa_dma_unmap(v
, v
->address_space_id
, result
->iova
, size
);
914 if (unlikely(r
< 0)) {
915 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r
), -r
);
919 vhost_iova_tree_remove(v
->iova_tree
, *result
);
922 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev
*dev
,
923 const VhostShadowVirtqueue
*svq
)
925 struct vhost_vdpa
*v
= dev
->opaque
;
926 struct vhost_vring_addr svq_addr
;
928 vhost_svq_get_vring_addr(svq
, &svq_addr
);
930 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.desc_user_addr
);
932 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.used_user_addr
);
936 * Map the SVQ area in the device
938 * @v: Vhost-vdpa device
939 * @needle: The area to search iova
940 * @errorp: Error pointer
942 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa
*v
, DMAMap
*needle
,
947 r
= vhost_iova_tree_map_alloc(v
->iova_tree
, needle
);
948 if (unlikely(r
!= IOVA_OK
)) {
949 error_setg(errp
, "Cannot allocate iova (%d)", r
);
953 r
= vhost_vdpa_dma_map(v
, v
->address_space_id
, needle
->iova
,
955 (void *)(uintptr_t)needle
->translated_addr
,
956 needle
->perm
== IOMMU_RO
);
957 if (unlikely(r
!= 0)) {
958 error_setg_errno(errp
, -r
, "Cannot map region to device");
959 vhost_iova_tree_remove(v
->iova_tree
, *needle
);
966 * Map the shadow virtqueue rings in the device
968 * @dev: The vhost device
969 * @svq: The shadow virtqueue
970 * @addr: Assigned IOVA addresses
971 * @errp: Error pointer
973 static bool vhost_vdpa_svq_map_rings(struct vhost_dev
*dev
,
974 const VhostShadowVirtqueue
*svq
,
975 struct vhost_vring_addr
*addr
,
979 DMAMap device_region
, driver_region
;
980 struct vhost_vring_addr svq_addr
;
981 struct vhost_vdpa
*v
= dev
->opaque
;
982 size_t device_size
= vhost_svq_device_area_size(svq
);
983 size_t driver_size
= vhost_svq_driver_area_size(svq
);
987 vhost_svq_get_vring_addr(svq
, &svq_addr
);
989 driver_region
= (DMAMap
) {
990 .translated_addr
= svq_addr
.desc_user_addr
,
991 .size
= driver_size
- 1,
994 ok
= vhost_vdpa_svq_map_ring(v
, &driver_region
, errp
);
996 error_prepend(errp
, "Cannot create vq driver region: ");
999 addr
->desc_user_addr
= driver_region
.iova
;
1000 avail_offset
= svq_addr
.avail_user_addr
- svq_addr
.desc_user_addr
;
1001 addr
->avail_user_addr
= driver_region
.iova
+ avail_offset
;
1003 device_region
= (DMAMap
) {
1004 .translated_addr
= svq_addr
.used_user_addr
,
1005 .size
= device_size
- 1,
1008 ok
= vhost_vdpa_svq_map_ring(v
, &device_region
, errp
);
1009 if (unlikely(!ok
)) {
1010 error_prepend(errp
, "Cannot create vq device region: ");
1011 vhost_vdpa_svq_unmap_ring(v
, driver_region
.translated_addr
);
1013 addr
->used_user_addr
= device_region
.iova
;
1018 static bool vhost_vdpa_svq_setup(struct vhost_dev
*dev
,
1019 VhostShadowVirtqueue
*svq
, unsigned idx
,
1022 uint16_t vq_index
= dev
->vq_index
+ idx
;
1023 struct vhost_vring_state s
= {
1028 r
= vhost_vdpa_set_dev_vring_base(dev
, &s
);
1030 error_setg_errno(errp
, -r
, "Cannot set vring base");
1034 r
= vhost_vdpa_svq_set_fds(dev
, svq
, idx
, errp
);
1038 static bool vhost_vdpa_svqs_start(struct vhost_dev
*dev
)
1040 struct vhost_vdpa
*v
= dev
->opaque
;
1044 if (!v
->shadow_vqs_enabled
) {
1048 for (i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1049 VirtQueue
*vq
= virtio_get_queue(dev
->vdev
, dev
->vq_index
+ i
);
1050 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1051 struct vhost_vring_addr addr
= {
1052 .index
= dev
->vq_index
+ i
,
1055 bool ok
= vhost_vdpa_svq_setup(dev
, svq
, i
, &err
);
1056 if (unlikely(!ok
)) {
1060 vhost_svq_start(svq
, dev
->vdev
, vq
, v
->iova_tree
);
1061 ok
= vhost_vdpa_svq_map_rings(dev
, svq
, &addr
, &err
);
1062 if (unlikely(!ok
)) {
1066 /* Override vring GPA set by vhost subsystem */
1067 r
= vhost_vdpa_set_vring_dev_addr(dev
, &addr
);
1068 if (unlikely(r
!= 0)) {
1069 error_setg_errno(&err
, -r
, "Cannot set device address");
1077 vhost_vdpa_svq_unmap_rings(dev
, g_ptr_array_index(v
->shadow_vqs
, i
));
1080 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, i
));
1083 error_reportf_err(err
, "Cannot setup SVQ %u: ", i
);
1084 for (unsigned j
= 0; j
< i
; ++j
) {
1085 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, j
);
1086 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1087 vhost_svq_stop(svq
);
1093 static void vhost_vdpa_svqs_stop(struct vhost_dev
*dev
)
1095 struct vhost_vdpa
*v
= dev
->opaque
;
1097 if (!v
->shadow_vqs_enabled
) {
1101 for (unsigned i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1102 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1103 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1105 event_notifier_cleanup(&svq
->hdev_kick
);
1106 event_notifier_cleanup(&svq
->hdev_call
);
1110 static int vhost_vdpa_dev_start(struct vhost_dev
*dev
, bool started
)
1112 struct vhost_vdpa
*v
= dev
->opaque
;
1114 trace_vhost_vdpa_dev_start(dev
, started
);
1117 vhost_vdpa_host_notifiers_init(dev
);
1118 ok
= vhost_vdpa_svqs_start(dev
);
1119 if (unlikely(!ok
)) {
1122 vhost_vdpa_set_vring_ready(dev
);
1124 vhost_vdpa_svqs_stop(dev
);
1125 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
1128 if (dev
->vq_index
+ dev
->nvqs
!= dev
->vq_index_end
) {
1133 memory_listener_register(&v
->listener
, &address_space_memory
);
1134 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_DRIVER_OK
);
1136 vhost_vdpa_reset_device(dev
);
1137 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
1138 VIRTIO_CONFIG_S_DRIVER
);
1139 memory_listener_unregister(&v
->listener
);
1145 static int vhost_vdpa_set_log_base(struct vhost_dev
*dev
, uint64_t base
,
1146 struct vhost_log
*log
)
1148 struct vhost_vdpa
*v
= dev
->opaque
;
1149 if (v
->shadow_vqs_enabled
|| !vhost_vdpa_first_dev(dev
)) {
1153 trace_vhost_vdpa_set_log_base(dev
, base
, log
->size
, log
->refcnt
, log
->fd
,
1155 return vhost_vdpa_call(dev
, VHOST_SET_LOG_BASE
, &base
);
1158 static int vhost_vdpa_set_vring_addr(struct vhost_dev
*dev
,
1159 struct vhost_vring_addr
*addr
)
1161 struct vhost_vdpa
*v
= dev
->opaque
;
1163 if (v
->shadow_vqs_enabled
) {
1165 * Device vring addr was set at device start. SVQ base is handled by
1171 return vhost_vdpa_set_vring_dev_addr(dev
, addr
);
1174 static int vhost_vdpa_set_vring_num(struct vhost_dev
*dev
,
1175 struct vhost_vring_state
*ring
)
1177 trace_vhost_vdpa_set_vring_num(dev
, ring
->index
, ring
->num
);
1178 return vhost_vdpa_call(dev
, VHOST_SET_VRING_NUM
, ring
);
1181 static int vhost_vdpa_set_vring_base(struct vhost_dev
*dev
,
1182 struct vhost_vring_state
*ring
)
1184 struct vhost_vdpa
*v
= dev
->opaque
;
1185 VirtQueue
*vq
= virtio_get_queue(dev
->vdev
, ring
->index
);
1188 * vhost-vdpa devices does not support in-flight requests. Set all of them
1191 * TODO: This is ok for networking, but other kinds of devices might
1192 * have problems with these retransmissions.
1194 while (virtqueue_rewind(vq
, 1)) {
1197 if (v
->shadow_vqs_enabled
) {
1199 * Device vring base was set at device start. SVQ base is handled by
1205 return vhost_vdpa_set_dev_vring_base(dev
, ring
);
1208 static int vhost_vdpa_get_vring_base(struct vhost_dev
*dev
,
1209 struct vhost_vring_state
*ring
)
1211 struct vhost_vdpa
*v
= dev
->opaque
;
1214 if (v
->shadow_vqs_enabled
) {
1215 ring
->num
= virtio_queue_get_last_avail_idx(dev
->vdev
, ring
->index
);
1219 ret
= vhost_vdpa_call(dev
, VHOST_GET_VRING_BASE
, ring
);
1220 trace_vhost_vdpa_get_vring_base(dev
, ring
->index
, ring
->num
);
1224 static int vhost_vdpa_set_vring_kick(struct vhost_dev
*dev
,
1225 struct vhost_vring_file
*file
)
1227 struct vhost_vdpa
*v
= dev
->opaque
;
1228 int vdpa_idx
= file
->index
- dev
->vq_index
;
1230 if (v
->shadow_vqs_enabled
) {
1231 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1232 vhost_svq_set_svq_kick_fd(svq
, file
->fd
);
1235 return vhost_vdpa_set_vring_dev_kick(dev
, file
);
1239 static int vhost_vdpa_set_vring_call(struct vhost_dev
*dev
,
1240 struct vhost_vring_file
*file
)
1242 struct vhost_vdpa
*v
= dev
->opaque
;
1244 if (v
->shadow_vqs_enabled
) {
1245 int vdpa_idx
= file
->index
- dev
->vq_index
;
1246 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1248 vhost_svq_set_svq_call_fd(svq
, file
->fd
);
1251 return vhost_vdpa_set_vring_dev_call(dev
, file
);
1255 static int vhost_vdpa_get_features(struct vhost_dev
*dev
,
1258 struct vhost_vdpa
*v
= dev
->opaque
;
1259 int ret
= vhost_vdpa_get_dev_features(dev
, features
);
1261 if (ret
== 0 && v
->shadow_vqs_enabled
) {
1262 /* Add SVQ logging capabilities */
1263 *features
|= BIT_ULL(VHOST_F_LOG_ALL
);
1269 static int vhost_vdpa_set_owner(struct vhost_dev
*dev
)
1271 if (!vhost_vdpa_first_dev(dev
)) {
1275 trace_vhost_vdpa_set_owner(dev
);
1276 return vhost_vdpa_call(dev
, VHOST_SET_OWNER
, NULL
);
1279 static int vhost_vdpa_vq_get_addr(struct vhost_dev
*dev
,
1280 struct vhost_vring_addr
*addr
, struct vhost_virtqueue
*vq
)
1282 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
1283 addr
->desc_user_addr
= (uint64_t)(unsigned long)vq
->desc_phys
;
1284 addr
->avail_user_addr
= (uint64_t)(unsigned long)vq
->avail_phys
;
1285 addr
->used_user_addr
= (uint64_t)(unsigned long)vq
->used_phys
;
1286 trace_vhost_vdpa_vq_get_addr(dev
, vq
, addr
->desc_user_addr
,
1287 addr
->avail_user_addr
, addr
->used_user_addr
);
1291 static bool vhost_vdpa_force_iommu(struct vhost_dev
*dev
)
1296 const VhostOps vdpa_ops
= {
1297 .backend_type
= VHOST_BACKEND_TYPE_VDPA
,
1298 .vhost_backend_init
= vhost_vdpa_init
,
1299 .vhost_backend_cleanup
= vhost_vdpa_cleanup
,
1300 .vhost_set_log_base
= vhost_vdpa_set_log_base
,
1301 .vhost_set_vring_addr
= vhost_vdpa_set_vring_addr
,
1302 .vhost_set_vring_num
= vhost_vdpa_set_vring_num
,
1303 .vhost_set_vring_base
= vhost_vdpa_set_vring_base
,
1304 .vhost_get_vring_base
= vhost_vdpa_get_vring_base
,
1305 .vhost_set_vring_kick
= vhost_vdpa_set_vring_kick
,
1306 .vhost_set_vring_call
= vhost_vdpa_set_vring_call
,
1307 .vhost_get_features
= vhost_vdpa_get_features
,
1308 .vhost_set_backend_cap
= vhost_vdpa_set_backend_cap
,
1309 .vhost_set_owner
= vhost_vdpa_set_owner
,
1310 .vhost_set_vring_endian
= NULL
,
1311 .vhost_backend_memslots_limit
= vhost_vdpa_memslots_limit
,
1312 .vhost_set_mem_table
= vhost_vdpa_set_mem_table
,
1313 .vhost_set_features
= vhost_vdpa_set_features
,
1314 .vhost_reset_device
= vhost_vdpa_reset_device
,
1315 .vhost_get_vq_index
= vhost_vdpa_get_vq_index
,
1316 .vhost_get_config
= vhost_vdpa_get_config
,
1317 .vhost_set_config
= vhost_vdpa_set_config
,
1318 .vhost_requires_shm_log
= NULL
,
1319 .vhost_migration_done
= NULL
,
1320 .vhost_backend_can_merge
= NULL
,
1321 .vhost_net_set_mtu
= NULL
,
1322 .vhost_set_iotlb_callback
= NULL
,
1323 .vhost_send_device_iotlb_msg
= NULL
,
1324 .vhost_dev_start
= vhost_vdpa_dev_start
,
1325 .vhost_get_device_id
= vhost_vdpa_get_device_id
,
1326 .vhost_vq_get_addr
= vhost_vdpa_vq_get_addr
,
1327 .vhost_force_iommu
= vhost_vdpa_force_iommu
,
1328 .vhost_set_config_call
= vhost_vdpa_set_config_call
,