vhost-vdpa: introduce vhost-vdpa backend
[qemu.git] / hw / virtio / vhost-vdpa.c
1 /*
2 * vhost-vdpa
3 *
4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
22 #include <linux/kvm.h>
23 #include "sysemu/kvm.h"
24
25 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section)
26 {
27 return (!memory_region_is_ram(section->mr) &&
28 !memory_region_is_iommu(section->mr)) ||
29 /*
30 * Sizing an enabled 64-bit BAR can cause spurious mappings to
31 * addresses in the upper part of the 64-bit address space. These
32 * are never accessed by the CPU and beyond the address width of
33 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width.
34 */
35 section->offset_within_address_space & (1ULL << 63);
36 }
37
38 static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
39 void *vaddr, bool readonly)
40 {
41 struct vhost_msg_v2 msg;
42 int fd = v->device_fd;
43 int ret = 0;
44
45 msg.type = v->msg_type;
46 msg.iotlb.iova = iova;
47 msg.iotlb.size = size;
48 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
49 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
50 msg.iotlb.type = VHOST_IOTLB_UPDATE;
51
52 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
53 error_report("failed to write, fd=%d, errno=%d (%s)",
54 fd, errno, strerror(errno));
55 return -EIO ;
56 }
57
58 return ret;
59 }
60
61 static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
62 hwaddr size)
63 {
64 struct vhost_msg_v2 msg;
65 int fd = v->device_fd;
66 int ret = 0;
67
68 msg.type = v->msg_type;
69 msg.iotlb.iova = iova;
70 msg.iotlb.size = size;
71 msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
72
73 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
74 error_report("failed to write, fd=%d, errno=%d (%s)",
75 fd, errno, strerror(errno));
76 return -EIO ;
77 }
78
79 return ret;
80 }
81
82 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
83 MemoryRegionSection *section)
84 {
85 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
86 hwaddr iova;
87 Int128 llend, llsize;
88 void *vaddr;
89 int ret;
90
91 if (vhost_vdpa_listener_skipped_section(section)) {
92 return;
93 }
94
95 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
96 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
97 error_report("%s received unaligned region", __func__);
98 return;
99 }
100
101 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
102 llend = int128_make64(section->offset_within_address_space);
103 llend = int128_add(llend, section->size);
104 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
105
106 if (int128_ge(int128_make64(iova), llend)) {
107 return;
108 }
109
110 memory_region_ref(section->mr);
111
112 /* Here we assume that memory_region_is_ram(section->mr)==true */
113
114 vaddr = memory_region_get_ram_ptr(section->mr) +
115 section->offset_within_region +
116 (iova - section->offset_within_address_space);
117
118 llsize = int128_sub(llend, int128_make64(iova));
119
120 ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
121 vaddr, section->readonly);
122 if (ret) {
123 error_report("vhost vdpa map fail!");
124 if (memory_region_is_ram_device(section->mr)) {
125 /* Allow unexpected mappings not to be fatal for RAM devices */
126 error_report("map ram fail!");
127 return ;
128 }
129 goto fail;
130 }
131
132 return;
133
134 fail:
135 if (memory_region_is_ram_device(section->mr)) {
136 error_report("failed to vdpa_dma_map. pci p2p may not work");
137 return;
138
139 }
140 /*
141 * On the initfn path, store the first error in the container so we
142 * can gracefully fail. Runtime, there's not much we can do other
143 * than throw a hardware error.
144 */
145 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
146 return;
147
148 }
149
150 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
151 MemoryRegionSection *section)
152 {
153 struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
154 hwaddr iova;
155 Int128 llend, llsize;
156 int ret;
157 bool try_unmap = true;
158
159 if (vhost_vdpa_listener_skipped_section(section)) {
160 return;
161 }
162
163 if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
164 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
165 error_report("%s received unaligned region", __func__);
166 return;
167 }
168
169 iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
170 llend = int128_make64(section->offset_within_address_space);
171 llend = int128_add(llend, section->size);
172 llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
173
174 if (int128_ge(int128_make64(iova), llend)) {
175 return;
176 }
177
178 llsize = int128_sub(llend, int128_make64(iova));
179
180 if (try_unmap) {
181 ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
182 if (ret) {
183 error_report("vhost_vdpa dma unmap error!");
184 }
185 }
186
187 memory_region_unref(section->mr);
188 }
189 /*
190 * IOTLB API is used by vhost-vpda which requires incremental updating
191 * of the mapping. So we can not use generic vhost memory listener which
192 * depends on the addnop().
193 */
194 static const MemoryListener vhost_vdpa_memory_listener = {
195 .region_add = vhost_vdpa_listener_region_add,
196 .region_del = vhost_vdpa_listener_region_del,
197 };
198
199 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
200 void *arg)
201 {
202 struct vhost_vdpa *v = dev->opaque;
203 int fd = v->device_fd;
204
205 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
206
207 return ioctl(fd, request, arg);
208 }
209
210 static void vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
211 {
212 uint8_t s;
213
214 if (vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s)) {
215 return;
216 }
217
218 s |= status;
219
220 vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
221 }
222
223 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque)
224 {
225 struct vhost_vdpa *v;
226 uint64_t features;
227 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
228
229 v = opaque;
230 dev->opaque = opaque ;
231 vhost_vdpa_call(dev, VHOST_GET_FEATURES, &features);
232 dev->backend_features = features;
233 v->listener = vhost_vdpa_memory_listener;
234 v->msg_type = VHOST_IOTLB_MSG_V2;
235
236 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
237 VIRTIO_CONFIG_S_DRIVER);
238
239 return 0;
240 }
241
242 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
243 {
244 struct vhost_vdpa *v;
245 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
246 v = dev->opaque;
247 memory_listener_unregister(&v->listener);
248
249 dev->opaque = NULL;
250 return 0;
251 }
252
253 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
254 {
255 return INT_MAX;
256 }
257
258 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
259 struct vhost_memory *mem)
260 {
261
262 if (mem->padding) {
263 return -1;
264 }
265
266 return 0;
267 }
268
269 static int vhost_vdpa_set_features(struct vhost_dev *dev,
270 uint64_t features)
271 {
272 int ret;
273 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
274 uint8_t status = 0;
275 if (ret) {
276 return ret;
277 }
278 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
279 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
280
281 return !(status & VIRTIO_CONFIG_S_FEATURES_OK);
282 }
283
284 int vhost_vdpa_get_device_id(struct vhost_dev *dev,
285 uint32_t *device_id)
286 {
287 return vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
288 }
289
290 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
291 {
292 uint8_t status = 0;
293
294 return vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
295 }
296
297 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
298 {
299 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
300
301 return idx - dev->vq_index;
302 }
303
304 static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
305 {
306 int i;
307 for (i = 0; i < dev->nvqs; ++i) {
308 struct vhost_vring_state state = {
309 .index = dev->vq_index + i,
310 .num = 1,
311 };
312 vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
313 }
314 return 0;
315 }
316
317 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
318 uint32_t offset, uint32_t size,
319 uint32_t flags)
320 {
321 struct vhost_vdpa_config *config;
322 int ret;
323 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
324 config = g_malloc(size + config_size);
325 if (config == NULL) {
326 return -1;
327 }
328 config->off = offset;
329 config->len = size;
330 memcpy(config->buf, data, size);
331 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
332 g_free(config);
333 return ret;
334 }
335
336 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
337 uint32_t config_len)
338 {
339 struct vhost_vdpa_config *v_config;
340 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
341 int ret;
342
343 v_config = g_malloc(config_len + config_size);
344 if (v_config == NULL) {
345 return -1;
346 }
347 v_config->len = config_len;
348 v_config->off = 0;
349 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
350 memcpy(config, v_config->buf, config_len);
351 g_free(v_config);
352 return ret;
353 }
354
355 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
356 {
357 struct vhost_vdpa *v = dev->opaque;
358 if (started) {
359 uint8_t status = 0;
360 memory_listener_register(&v->listener, &address_space_memory);
361 vhost_vdpa_set_vring_ready(dev);
362 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
363 vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &status);
364
365 return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
366 } else {
367 vhost_vdpa_reset_device(dev);
368 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
369 VIRTIO_CONFIG_S_DRIVER);
370 memory_listener_unregister(&v->listener);
371
372 return 0;
373 }
374 }
375
376 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
377 struct vhost_log *log)
378 {
379 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
380 }
381
382 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
383 struct vhost_vring_addr *addr)
384 {
385 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
386 }
387
388 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
389 struct vhost_vring_state *ring)
390 {
391 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
392 }
393
394 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
395 struct vhost_vring_state *ring)
396 {
397 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
398 }
399
400 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
401 struct vhost_vring_state *ring)
402 {
403 return vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
404 }
405
406 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
407 struct vhost_vring_file *file)
408 {
409 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
410 }
411
412 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
413 struct vhost_vring_file *file)
414 {
415 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
416 }
417
418 static int vhost_vdpa_get_features(struct vhost_dev *dev,
419 uint64_t *features)
420 {
421 return vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
422 }
423
424 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
425 {
426 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
427 }
428
429 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
430 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
431 {
432 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
433 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
434 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
435 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
436 return 0;
437 }
438
439 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev)
440 {
441 return true;
442 }
443
444 const VhostOps vdpa_ops = {
445 .backend_type = VHOST_BACKEND_TYPE_VDPA,
446 .vhost_backend_init = vhost_vdpa_init,
447 .vhost_backend_cleanup = vhost_vdpa_cleanup,
448 .vhost_set_log_base = vhost_vdpa_set_log_base,
449 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
450 .vhost_set_vring_num = vhost_vdpa_set_vring_num,
451 .vhost_set_vring_base = vhost_vdpa_set_vring_base,
452 .vhost_get_vring_base = vhost_vdpa_get_vring_base,
453 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
454 .vhost_set_vring_call = vhost_vdpa_set_vring_call,
455 .vhost_get_features = vhost_vdpa_get_features,
456 .vhost_set_owner = vhost_vdpa_set_owner,
457 .vhost_set_vring_endian = NULL,
458 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
459 .vhost_set_mem_table = vhost_vdpa_set_mem_table,
460 .vhost_set_features = vhost_vdpa_set_features,
461 .vhost_reset_device = vhost_vdpa_reset_device,
462 .vhost_get_vq_index = vhost_vdpa_get_vq_index,
463 .vhost_get_config = vhost_vdpa_get_config,
464 .vhost_set_config = vhost_vdpa_set_config,
465 .vhost_requires_shm_log = NULL,
466 .vhost_migration_done = NULL,
467 .vhost_backend_can_merge = NULL,
468 .vhost_net_set_mtu = NULL,
469 .vhost_set_iotlb_callback = NULL,
470 .vhost_send_device_iotlb_msg = NULL,
471 .vhost_dev_start = vhost_vdpa_dev_start,
472 .vhost_get_device_id = vhost_vdpa_get_device_id,
473 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
474 .vhost_force_iommu = vhost_vdpa_force_iommu,
475 };