util/vhost-user-server: drop unused DevicePanicNotifier
[qemu.git] / block / export / vhost-user-blk-server.c
1 /*
2 * Sharing QEMU block devices via vhost-user protocal
3 *
4 * Parts of the code based on nbd/server.c.
5 *
6 * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
7 * Copyright (c) 2020 Red Hat, Inc.
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or
10 * later. See the COPYING file in the top-level directory.
11 */
12 #include "qemu/osdep.h"
13 #include "block/block.h"
14 #include "vhost-user-blk-server.h"
15 #include "qapi/error.h"
16 #include "qom/object_interfaces.h"
17 #include "sysemu/block-backend.h"
18 #include "util/block-helpers.h"
19
20 enum {
21 VHOST_USER_BLK_MAX_QUEUES = 1,
22 };
23 struct virtio_blk_inhdr {
24 unsigned char status;
25 };
26
27 typedef struct VuBlockReq {
28 VuVirtqElement elem;
29 int64_t sector_num;
30 size_t size;
31 struct virtio_blk_inhdr *in;
32 struct virtio_blk_outhdr out;
33 VuServer *server;
34 struct VuVirtq *vq;
35 } VuBlockReq;
36
37 static void vu_block_req_complete(VuBlockReq *req)
38 {
39 VuDev *vu_dev = &req->server->vu_dev;
40
41 /* IO size with 1 extra status byte */
42 vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
43 vu_queue_notify(vu_dev, req->vq);
44
45 free(req);
46 }
47
48 static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
49 {
50 return container_of(server, VuBlockDev, vu_server);
51 }
52
53 static int coroutine_fn
54 vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
55 uint32_t iovcnt, uint32_t type)
56 {
57 struct virtio_blk_discard_write_zeroes desc;
58 ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
59 if (unlikely(size != sizeof(desc))) {
60 error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
61 return -EINVAL;
62 }
63
64 VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
65 uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
66 le32_to_cpu(desc.num_sectors) << 9 };
67 if (type == VIRTIO_BLK_T_DISCARD) {
68 if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
69 return 0;
70 }
71 } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
72 if (blk_co_pwrite_zeroes(vdev_blk->backend,
73 range[0], range[1], 0) == 0) {
74 return 0;
75 }
76 }
77
78 return -EINVAL;
79 }
80
81 static void coroutine_fn vu_block_flush(VuBlockReq *req)
82 {
83 VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
84 BlockBackend *backend = vdev_blk->backend;
85 blk_co_flush(backend);
86 }
87
88 static void coroutine_fn vu_block_virtio_process_req(void *opaque)
89 {
90 VuBlockReq *req = opaque;
91 VuServer *server = req->server;
92 VuVirtqElement *elem = &req->elem;
93 uint32_t type;
94
95 VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
96 BlockBackend *backend = vdev_blk->backend;
97
98 struct iovec *in_iov = elem->in_sg;
99 struct iovec *out_iov = elem->out_sg;
100 unsigned in_num = elem->in_num;
101 unsigned out_num = elem->out_num;
102
103 /* refer to hw/block/virtio_blk.c */
104 if (elem->out_num < 1 || elem->in_num < 1) {
105 error_report("virtio-blk request missing headers");
106 goto err;
107 }
108
109 if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
110 sizeof(req->out)) != sizeof(req->out))) {
111 error_report("virtio-blk request outhdr too short");
112 goto err;
113 }
114
115 iov_discard_front(&out_iov, &out_num, sizeof(req->out));
116
117 if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
118 error_report("virtio-blk request inhdr too short");
119 goto err;
120 }
121
122 /* We always touch the last byte, so just see how big in_iov is. */
123 req->in = (void *)in_iov[in_num - 1].iov_base
124 + in_iov[in_num - 1].iov_len
125 - sizeof(struct virtio_blk_inhdr);
126 iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
127
128 type = le32_to_cpu(req->out.type);
129 switch (type & ~VIRTIO_BLK_T_BARRIER) {
130 case VIRTIO_BLK_T_IN:
131 case VIRTIO_BLK_T_OUT: {
132 ssize_t ret = 0;
133 bool is_write = type & VIRTIO_BLK_T_OUT;
134 req->sector_num = le64_to_cpu(req->out.sector);
135
136 int64_t offset = req->sector_num * vdev_blk->blk_size;
137 QEMUIOVector qiov;
138 if (is_write) {
139 qemu_iovec_init_external(&qiov, out_iov, out_num);
140 ret = blk_co_pwritev(backend, offset, qiov.size,
141 &qiov, 0);
142 } else {
143 qemu_iovec_init_external(&qiov, in_iov, in_num);
144 ret = blk_co_preadv(backend, offset, qiov.size,
145 &qiov, 0);
146 }
147 if (ret >= 0) {
148 req->in->status = VIRTIO_BLK_S_OK;
149 } else {
150 req->in->status = VIRTIO_BLK_S_IOERR;
151 }
152 break;
153 }
154 case VIRTIO_BLK_T_FLUSH:
155 vu_block_flush(req);
156 req->in->status = VIRTIO_BLK_S_OK;
157 break;
158 case VIRTIO_BLK_T_GET_ID: {
159 size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
160 VIRTIO_BLK_ID_BYTES);
161 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
162 req->in->status = VIRTIO_BLK_S_OK;
163 req->size = elem->in_sg[0].iov_len;
164 break;
165 }
166 case VIRTIO_BLK_T_DISCARD:
167 case VIRTIO_BLK_T_WRITE_ZEROES: {
168 int rc;
169 rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
170 out_num, type);
171 if (rc == 0) {
172 req->in->status = VIRTIO_BLK_S_OK;
173 } else {
174 req->in->status = VIRTIO_BLK_S_IOERR;
175 }
176 break;
177 }
178 default:
179 req->in->status = VIRTIO_BLK_S_UNSUPP;
180 break;
181 }
182
183 vu_block_req_complete(req);
184 return;
185
186 err:
187 free(elem);
188 }
189
190 static void vu_block_process_vq(VuDev *vu_dev, int idx)
191 {
192 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
193 VuVirtq *vq = vu_get_queue(vu_dev, idx);
194
195 while (1) {
196 VuBlockReq *req;
197
198 req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
199 if (!req) {
200 break;
201 }
202
203 req->server = server;
204 req->vq = vq;
205
206 Coroutine *co =
207 qemu_coroutine_create(vu_block_virtio_process_req, req);
208 qemu_coroutine_enter(co);
209 }
210 }
211
212 static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
213 {
214 VuVirtq *vq;
215
216 assert(vu_dev);
217
218 vq = vu_get_queue(vu_dev, idx);
219 vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
220 }
221
222 static uint64_t vu_block_get_features(VuDev *dev)
223 {
224 uint64_t features;
225 VuServer *server = container_of(dev, VuServer, vu_dev);
226 VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
227 features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
228 1ull << VIRTIO_BLK_F_SEG_MAX |
229 1ull << VIRTIO_BLK_F_TOPOLOGY |
230 1ull << VIRTIO_BLK_F_BLK_SIZE |
231 1ull << VIRTIO_BLK_F_FLUSH |
232 1ull << VIRTIO_BLK_F_DISCARD |
233 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
234 1ull << VIRTIO_BLK_F_CONFIG_WCE |
235 1ull << VIRTIO_F_VERSION_1 |
236 1ull << VIRTIO_RING_F_INDIRECT_DESC |
237 1ull << VIRTIO_RING_F_EVENT_IDX |
238 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
239
240 if (!vdev_blk->writable) {
241 features |= 1ull << VIRTIO_BLK_F_RO;
242 }
243
244 return features;
245 }
246
247 static uint64_t vu_block_get_protocol_features(VuDev *dev)
248 {
249 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
250 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
251 }
252
253 static int
254 vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
255 {
256 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
257 VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
258 memcpy(config, &vdev_blk->blkcfg, len);
259
260 return 0;
261 }
262
263 static int
264 vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
265 uint32_t offset, uint32_t size, uint32_t flags)
266 {
267 VuServer *server = container_of(vu_dev, VuServer, vu_dev);
268 VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
269 uint8_t wce;
270
271 /* don't support live migration */
272 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
273 return -EINVAL;
274 }
275
276 if (offset != offsetof(struct virtio_blk_config, wce) ||
277 size != 1) {
278 return -EINVAL;
279 }
280
281 wce = *data;
282 vdev_blk->blkcfg.wce = wce;
283 blk_set_enable_write_cache(vdev_blk->backend, wce);
284 return 0;
285 }
286
287 /*
288 * When the client disconnects, it sends a VHOST_USER_NONE request
289 * and vu_process_message will simple call exit which cause the VM
290 * to exit abruptly.
291 * To avoid this issue, process VHOST_USER_NONE request ahead
292 * of vu_process_message.
293 *
294 */
295 static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
296 {
297 if (vmsg->request == VHOST_USER_NONE) {
298 dev->panic(dev, "disconnect");
299 return true;
300 }
301 return false;
302 }
303
304 static const VuDevIface vu_block_iface = {
305 .get_features = vu_block_get_features,
306 .queue_set_started = vu_block_queue_set_started,
307 .get_protocol_features = vu_block_get_protocol_features,
308 .get_config = vu_block_get_config,
309 .set_config = vu_block_set_config,
310 .process_msg = vu_block_process_msg,
311 };
312
313 static void blk_aio_attached(AioContext *ctx, void *opaque)
314 {
315 VuBlockDev *vub_dev = opaque;
316 aio_context_acquire(ctx);
317 vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
318 aio_context_release(ctx);
319 }
320
321 static void blk_aio_detach(void *opaque)
322 {
323 VuBlockDev *vub_dev = opaque;
324 AioContext *ctx = vub_dev->vu_server.ctx;
325 aio_context_acquire(ctx);
326 vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
327 aio_context_release(ctx);
328 }
329
330 static void
331 vu_block_initialize_config(BlockDriverState *bs,
332 struct virtio_blk_config *config, uint32_t blk_size)
333 {
334 config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
335 config->blk_size = blk_size;
336 config->size_max = 0;
337 config->seg_max = 128 - 2;
338 config->min_io_size = 1;
339 config->opt_io_size = 1;
340 config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
341 config->max_discard_sectors = 32768;
342 config->max_discard_seg = 1;
343 config->discard_sector_alignment = config->blk_size >> 9;
344 config->max_write_zeroes_sectors = 32768;
345 config->max_write_zeroes_seg = 1;
346 }
347
348 static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
349 {
350
351 BlockBackend *blk;
352 Error *local_error = NULL;
353 const char *node_name = vu_block_device->node_name;
354 bool writable = vu_block_device->writable;
355 uint64_t perm = BLK_PERM_CONSISTENT_READ;
356 int ret;
357
358 AioContext *ctx;
359
360 BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
361
362 if (!bs) {
363 error_propagate(errp, local_error);
364 return NULL;
365 }
366
367 if (bdrv_is_read_only(bs)) {
368 writable = false;
369 }
370
371 if (writable) {
372 perm |= BLK_PERM_WRITE;
373 }
374
375 ctx = bdrv_get_aio_context(bs);
376 aio_context_acquire(ctx);
377 bdrv_invalidate_cache(bs, NULL);
378 aio_context_release(ctx);
379
380 /*
381 * Don't allow resize while the vhost user server is running,
382 * otherwise we don't care what happens with the node.
383 */
384 blk = blk_new(bdrv_get_aio_context(bs), perm,
385 BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
386 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
387 ret = blk_insert_bs(blk, bs, errp);
388
389 if (ret < 0) {
390 goto fail;
391 }
392
393 blk_set_enable_write_cache(blk, false);
394
395 blk_set_allow_aio_context_change(blk, true);
396
397 vu_block_device->blkcfg.wce = 0;
398 vu_block_device->backend = blk;
399 if (!vu_block_device->blk_size) {
400 vu_block_device->blk_size = BDRV_SECTOR_SIZE;
401 }
402 vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
403 blk_set_guest_block_size(blk, vu_block_device->blk_size);
404 vu_block_initialize_config(bs, &vu_block_device->blkcfg,
405 vu_block_device->blk_size);
406 return vu_block_device;
407
408 fail:
409 blk_unref(blk);
410 return NULL;
411 }
412
413 static void vu_block_deinit(VuBlockDev *vu_block_device)
414 {
415 if (vu_block_device->backend) {
416 blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
417 blk_aio_detach, vu_block_device);
418 }
419
420 blk_unref(vu_block_device->backend);
421 }
422
423 static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
424 {
425 vhost_user_server_stop(&vu_block_device->vu_server);
426 vu_block_deinit(vu_block_device);
427 }
428
429 static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
430 Error **errp)
431 {
432 AioContext *ctx;
433 SocketAddress *addr = vu_block_device->addr;
434
435 if (!vu_block_init(vu_block_device, errp)) {
436 return;
437 }
438
439 ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
440
441 if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
442 VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
443 errp)) {
444 goto error;
445 }
446
447 blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
448 blk_aio_detach, vu_block_device);
449 vu_block_device->running = true;
450 return;
451
452 error:
453 vu_block_deinit(vu_block_device);
454 }
455
456 static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
457 {
458 if (vus->running) {
459 error_setg(errp, "The property can't be modified "
460 "while the server is running");
461 return false;
462 }
463 return true;
464 }
465
466 static void vu_set_node_name(Object *obj, const char *value, Error **errp)
467 {
468 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
469
470 if (!vu_prop_modifiable(vus, errp)) {
471 return;
472 }
473
474 if (vus->node_name) {
475 g_free(vus->node_name);
476 }
477
478 vus->node_name = g_strdup(value);
479 }
480
481 static char *vu_get_node_name(Object *obj, Error **errp)
482 {
483 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
484 return g_strdup(vus->node_name);
485 }
486
487 static void free_socket_addr(SocketAddress *addr)
488 {
489 g_free(addr->u.q_unix.path);
490 g_free(addr);
491 }
492
493 static void vu_set_unix_socket(Object *obj, const char *value,
494 Error **errp)
495 {
496 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
497
498 if (!vu_prop_modifiable(vus, errp)) {
499 return;
500 }
501
502 if (vus->addr) {
503 free_socket_addr(vus->addr);
504 }
505
506 SocketAddress *addr = g_new0(SocketAddress, 1);
507 addr->type = SOCKET_ADDRESS_TYPE_UNIX;
508 addr->u.q_unix.path = g_strdup(value);
509 vus->addr = addr;
510 }
511
512 static char *vu_get_unix_socket(Object *obj, Error **errp)
513 {
514 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
515 return g_strdup(vus->addr->u.q_unix.path);
516 }
517
518 static bool vu_get_block_writable(Object *obj, Error **errp)
519 {
520 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
521 return vus->writable;
522 }
523
524 static void vu_set_block_writable(Object *obj, bool value, Error **errp)
525 {
526 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
527
528 if (!vu_prop_modifiable(vus, errp)) {
529 return;
530 }
531
532 vus->writable = value;
533 }
534
535 static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
536 void *opaque, Error **errp)
537 {
538 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
539 uint32_t value = vus->blk_size;
540
541 visit_type_uint32(v, name, &value, errp);
542 }
543
544 static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
545 void *opaque, Error **errp)
546 {
547 VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
548
549 Error *local_err = NULL;
550 uint32_t value;
551
552 if (!vu_prop_modifiable(vus, errp)) {
553 return;
554 }
555
556 visit_type_uint32(v, name, &value, &local_err);
557 if (local_err) {
558 goto out;
559 }
560
561 check_block_size(object_get_typename(obj), name, value, &local_err);
562 if (local_err) {
563 goto out;
564 }
565
566 vus->blk_size = value;
567
568 out:
569 error_propagate(errp, local_err);
570 }
571
572 static void vhost_user_blk_server_instance_finalize(Object *obj)
573 {
574 VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
575
576 vhost_user_blk_server_stop(vub);
577
578 /*
579 * Unlike object_property_add_str, object_class_property_add_str
580 * doesn't have a release method. Thus manual memory freeing is
581 * needed.
582 */
583 free_socket_addr(vub->addr);
584 g_free(vub->node_name);
585 }
586
587 static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
588 {
589 VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
590
591 vhost_user_blk_server_start(vub, errp);
592 }
593
594 static void vhost_user_blk_server_class_init(ObjectClass *klass,
595 void *class_data)
596 {
597 UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
598 ucc->complete = vhost_user_blk_server_complete;
599
600 object_class_property_add_bool(klass, "writable",
601 vu_get_block_writable,
602 vu_set_block_writable);
603
604 object_class_property_add_str(klass, "node-name",
605 vu_get_node_name,
606 vu_set_node_name);
607
608 object_class_property_add_str(klass, "unix-socket",
609 vu_get_unix_socket,
610 vu_set_unix_socket);
611
612 object_class_property_add(klass, "logical-block-size", "uint32",
613 vu_get_blk_size, vu_set_blk_size,
614 NULL, NULL);
615 }
616
617 static const TypeInfo vhost_user_blk_server_info = {
618 .name = TYPE_VHOST_USER_BLK_SERVER,
619 .parent = TYPE_OBJECT,
620 .instance_size = sizeof(VuBlockDev),
621 .instance_finalize = vhost_user_blk_server_instance_finalize,
622 .class_init = vhost_user_blk_server_class_init,
623 .interfaces = (InterfaceInfo[]) {
624 {TYPE_USER_CREATABLE},
625 {}
626 },
627 };
628
629 static void vhost_user_blk_server_register_types(void)
630 {
631 type_register_static(&vhost_user_blk_server_info);
632 }
633
634 type_init(vhost_user_blk_server_register_types)