Merge tag 'for_upstream' of git://git.kernel.org/pub/scm/virt/kvm/mst/qemu into staging
[qemu.git] / contrib / vhost-user-blk / vhost-user-blk.c
1 /*
2 * vhost-user-blk sample application
3 *
4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
5 *
6 * Author:
7 * Changpeng Liu <changpeng.liu@intel.com>
8 *
9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
10 * implementation by:
11 * Felipe Franciosi <felipe@nutanix.com>
12 * Anthony Liguori <aliguori@us.ibm.com>
13 *
14 * This work is licensed under the terms of the GNU GPL, version 2 only.
15 * See the COPYING file in the top-level directory.
16 */
17
18 #include "qemu/osdep.h"
19 #include "standard-headers/linux/virtio_blk.h"
20 #include "contrib/libvhost-user/libvhost-user-glib.h"
21 #include "contrib/libvhost-user/libvhost-user.h"
22
23
24 struct virtio_blk_inhdr {
25 unsigned char status;
26 };
27
28 /* vhost user block device */
29 typedef struct VubDev {
30 VugDev parent;
31 int blk_fd;
32 struct virtio_blk_config blkcfg;
33 bool enable_ro;
34 char *blk_name;
35 GMainLoop *loop;
36 } VubDev;
37
38 typedef struct VubReq {
39 VuVirtqElement *elem;
40 int64_t sector_num;
41 size_t size;
42 struct virtio_blk_inhdr *in;
43 struct virtio_blk_outhdr *out;
44 VubDev *vdev_blk;
45 struct VuVirtq *vq;
46 } VubReq;
47
48 /* refer util/iov.c */
49 static size_t vub_iov_size(const struct iovec *iov,
50 const unsigned int iov_cnt)
51 {
52 size_t len;
53 unsigned int i;
54
55 len = 0;
56 for (i = 0; i < iov_cnt; i++) {
57 len += iov[i].iov_len;
58 }
59 return len;
60 }
61
62 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
63 {
64 VugDev *gdev;
65 VubDev *vdev_blk;
66
67 assert(vu_dev);
68
69 gdev = container_of(vu_dev, VugDev, parent);
70 vdev_blk = container_of(gdev, VubDev, parent);
71 if (buf) {
72 g_warning("vu_panic: %s", buf);
73 }
74
75 g_main_loop_quit(vdev_blk->loop);
76 }
77
78 static void vub_req_complete(VubReq *req)
79 {
80 VugDev *gdev = &req->vdev_blk->parent;
81 VuDev *vu_dev = &gdev->parent;
82
83 /* IO size with 1 extra status byte */
84 vu_queue_push(vu_dev, req->vq, req->elem,
85 req->size + 1);
86 vu_queue_notify(vu_dev, req->vq);
87
88 if (req->elem) {
89 free(req->elem);
90 }
91
92 g_free(req);
93 }
94
95 static int vub_open(const char *file_name, bool wce)
96 {
97 int fd;
98 int flags = O_RDWR;
99
100 if (!wce) {
101 flags |= O_DIRECT;
102 }
103
104 fd = open(file_name, flags);
105 if (fd < 0) {
106 fprintf(stderr, "Cannot open file %s, %s\n", file_name,
107 strerror(errno));
108 return -1;
109 }
110
111 return fd;
112 }
113
114 static ssize_t
115 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
116 {
117 VubDev *vdev_blk = req->vdev_blk;
118 ssize_t rc;
119
120 if (!iovcnt) {
121 fprintf(stderr, "Invalid Read IOV count\n");
122 return -1;
123 }
124
125 req->size = vub_iov_size(iov, iovcnt);
126 rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
127 if (rc < 0) {
128 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
129 vdev_blk->blk_name, req->sector_num, req->size,
130 strerror(errno));
131 return -1;
132 }
133
134 return rc;
135 }
136
137 static ssize_t
138 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
139 {
140 VubDev *vdev_blk = req->vdev_blk;
141 ssize_t rc;
142
143 if (!iovcnt) {
144 fprintf(stderr, "Invalid Write IOV count\n");
145 return -1;
146 }
147
148 req->size = vub_iov_size(iov, iovcnt);
149 rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
150 if (rc < 0) {
151 fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
152 vdev_blk->blk_name, req->sector_num, req->size,
153 strerror(errno));
154 return -1;
155 }
156
157 return rc;
158 }
159
160 static void
161 vub_flush(VubReq *req)
162 {
163 VubDev *vdev_blk = req->vdev_blk;
164
165 fdatasync(vdev_blk->blk_fd);
166 }
167
168 static int vub_virtio_process_req(VubDev *vdev_blk,
169 VuVirtq *vq)
170 {
171 VugDev *gdev = &vdev_blk->parent;
172 VuDev *vu_dev = &gdev->parent;
173 VuVirtqElement *elem;
174 uint32_t type;
175 unsigned in_num;
176 unsigned out_num;
177 VubReq *req;
178
179 elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
180 if (!elem) {
181 return -1;
182 }
183
184 /* refer to hw/block/virtio_blk.c */
185 if (elem->out_num < 1 || elem->in_num < 1) {
186 fprintf(stderr, "virtio-blk request missing headers\n");
187 free(elem);
188 return -1;
189 }
190
191 req = g_new0(VubReq, 1);
192 req->vdev_blk = vdev_blk;
193 req->vq = vq;
194 req->elem = elem;
195
196 in_num = elem->in_num;
197 out_num = elem->out_num;
198
199 /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
200 if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
201 fprintf(stderr, "Invalid outhdr size\n");
202 goto err;
203 }
204 req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
205 out_num--;
206
207 if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
208 fprintf(stderr, "Invalid inhdr size\n");
209 goto err;
210 }
211 req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
212 in_num--;
213
214 type = le32toh(req->out->type);
215 switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
216 case VIRTIO_BLK_T_IN: {
217 ssize_t ret = 0;
218 bool is_write = type & VIRTIO_BLK_T_OUT;
219 req->sector_num = le64toh(req->out->sector);
220 if (is_write) {
221 ret = vub_writev(req, &elem->out_sg[1], out_num);
222 } else {
223 ret = vub_readv(req, &elem->in_sg[0], in_num);
224 }
225 if (ret >= 0) {
226 req->in->status = VIRTIO_BLK_S_OK;
227 } else {
228 req->in->status = VIRTIO_BLK_S_IOERR;
229 }
230 vub_req_complete(req);
231 break;
232 }
233 case VIRTIO_BLK_T_FLUSH: {
234 vub_flush(req);
235 req->in->status = VIRTIO_BLK_S_OK;
236 vub_req_complete(req);
237 break;
238 }
239 case VIRTIO_BLK_T_GET_ID: {
240 size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
241 VIRTIO_BLK_ID_BYTES);
242 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
243 req->in->status = VIRTIO_BLK_S_OK;
244 req->size = elem->in_sg[0].iov_len;
245 vub_req_complete(req);
246 break;
247 }
248 default: {
249 req->in->status = VIRTIO_BLK_S_UNSUPP;
250 vub_req_complete(req);
251 break;
252 }
253 }
254
255 return 0;
256
257 err:
258 free(elem);
259 g_free(req);
260 return -1;
261 }
262
263 static void vub_process_vq(VuDev *vu_dev, int idx)
264 {
265 VugDev *gdev;
266 VubDev *vdev_blk;
267 VuVirtq *vq;
268 int ret;
269
270 if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
271 fprintf(stderr, "VQ Index out of range: %d\n", idx);
272 vub_panic_cb(vu_dev, NULL);
273 return;
274 }
275
276 gdev = container_of(vu_dev, VugDev, parent);
277 vdev_blk = container_of(gdev, VubDev, parent);
278 assert(vdev_blk);
279
280 vq = vu_get_queue(vu_dev, idx);
281 assert(vq);
282
283 while (1) {
284 ret = vub_virtio_process_req(vdev_blk, vq);
285 if (ret) {
286 break;
287 }
288 }
289 }
290
291 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
292 {
293 VuVirtq *vq;
294
295 assert(vu_dev);
296
297 vq = vu_get_queue(vu_dev, idx);
298 vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
299 }
300
301 static uint64_t
302 vub_get_features(VuDev *dev)
303 {
304 uint64_t features;
305 VugDev *gdev;
306 VubDev *vdev_blk;
307
308 gdev = container_of(dev, VugDev, parent);
309 vdev_blk = container_of(gdev, VubDev, parent);
310
311 features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
312 1ull << VIRTIO_BLK_F_SEG_MAX |
313 1ull << VIRTIO_BLK_F_TOPOLOGY |
314 1ull << VIRTIO_BLK_F_BLK_SIZE |
315 1ull << VIRTIO_BLK_F_FLUSH |
316 1ull << VIRTIO_BLK_F_CONFIG_WCE |
317 1ull << VIRTIO_F_VERSION_1 |
318 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
319
320 if (vdev_blk->enable_ro) {
321 features |= 1ull << VIRTIO_BLK_F_RO;
322 }
323
324 return features;
325 }
326
327 static uint64_t
328 vub_get_protocol_features(VuDev *dev)
329 {
330 return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
331 }
332
333 static int
334 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
335 {
336 VugDev *gdev;
337 VubDev *vdev_blk;
338
339 gdev = container_of(vu_dev, VugDev, parent);
340 vdev_blk = container_of(gdev, VubDev, parent);
341 memcpy(config, &vdev_blk->blkcfg, len);
342
343 return 0;
344 }
345
346 static int
347 vub_set_config(VuDev *vu_dev, const uint8_t *data,
348 uint32_t offset, uint32_t size, uint32_t flags)
349 {
350 VugDev *gdev;
351 VubDev *vdev_blk;
352 uint8_t wce;
353 int fd;
354
355 /* don't support live migration */
356 if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
357 return -1;
358 }
359
360 gdev = container_of(vu_dev, VugDev, parent);
361 vdev_blk = container_of(gdev, VubDev, parent);
362
363 if (offset != offsetof(struct virtio_blk_config, wce) ||
364 size != 1) {
365 return -1;
366 }
367
368 wce = *data;
369 if (wce == vdev_blk->blkcfg.wce) {
370 /* Do nothing as same with old configuration */
371 return 0;
372 }
373
374 vdev_blk->blkcfg.wce = wce;
375 fprintf(stdout, "Write Cache Policy Changed\n");
376 if (vdev_blk->blk_fd >= 0) {
377 close(vdev_blk->blk_fd);
378 vdev_blk->blk_fd = -1;
379 }
380
381 fd = vub_open(vdev_blk->blk_name, wce);
382 if (fd < 0) {
383 fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
384 vdev_blk->blk_fd = -1;
385 return -1;
386 }
387 vdev_blk->blk_fd = fd;
388
389 return 0;
390 }
391
392 static const VuDevIface vub_iface = {
393 .get_features = vub_get_features,
394 .queue_set_started = vub_queue_set_started,
395 .get_protocol_features = vub_get_protocol_features,
396 .get_config = vub_get_config,
397 .set_config = vub_set_config,
398 };
399
400 static int unix_sock_new(char *unix_fn)
401 {
402 int sock;
403 struct sockaddr_un un;
404 size_t len;
405
406 assert(unix_fn);
407
408 sock = socket(AF_UNIX, SOCK_STREAM, 0);
409 if (sock <= 0) {
410 perror("socket");
411 return -1;
412 }
413
414 un.sun_family = AF_UNIX;
415 (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
416 len = sizeof(un.sun_family) + strlen(un.sun_path);
417
418 (void)unlink(unix_fn);
419 if (bind(sock, (struct sockaddr *)&un, len) < 0) {
420 perror("bind");
421 goto fail;
422 }
423
424 if (listen(sock, 1) < 0) {
425 perror("listen");
426 goto fail;
427 }
428
429 return sock;
430
431 fail:
432 (void)close(sock);
433
434 return -1;
435 }
436
437 static void vub_free(struct VubDev *vdev_blk)
438 {
439 if (!vdev_blk) {
440 return;
441 }
442
443 g_main_loop_unref(vdev_blk->loop);
444 if (vdev_blk->blk_fd >= 0) {
445 close(vdev_blk->blk_fd);
446 }
447 g_free(vdev_blk);
448 }
449
450 static uint32_t
451 vub_get_blocksize(int fd)
452 {
453 uint32_t blocksize = 512;
454
455 #if defined(__linux__) && defined(BLKSSZGET)
456 if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
457 return blocklen;
458 }
459 #endif
460
461 return blocksize;
462 }
463
464 static void
465 vub_initialize_config(int fd, struct virtio_blk_config *config)
466 {
467 off64_t capacity;
468
469 capacity = lseek64(fd, 0, SEEK_END);
470 config->capacity = capacity >> 9;
471 config->blk_size = vub_get_blocksize(fd);
472 config->size_max = 65536;
473 config->seg_max = 128 - 2;
474 config->min_io_size = 1;
475 config->opt_io_size = 1;
476 config->num_queues = 1;
477 }
478
479 static VubDev *
480 vub_new(char *blk_file)
481 {
482 VubDev *vdev_blk;
483
484 vdev_blk = g_new0(VubDev, 1);
485 vdev_blk->loop = g_main_loop_new(NULL, FALSE);
486 vdev_blk->blk_fd = vub_open(blk_file, 0);
487 if (vdev_blk->blk_fd < 0) {
488 fprintf(stderr, "Error to open block device %s\n", blk_file);
489 vub_free(vdev_blk);
490 return NULL;
491 }
492 vdev_blk->enable_ro = false;
493 vdev_blk->blkcfg.wce = 0;
494 vdev_blk->blk_name = blk_file;
495
496 /* fill virtio_blk_config with block parameters */
497 vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
498
499 return vdev_blk;
500 }
501
502 int main(int argc, char **argv)
503 {
504 int opt;
505 char *unix_socket = NULL;
506 char *blk_file = NULL;
507 bool enable_ro = false;
508 int lsock = -1, csock = -1;
509 VubDev *vdev_blk = NULL;
510
511 while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
512 switch (opt) {
513 case 'b':
514 blk_file = g_strdup(optarg);
515 break;
516 case 's':
517 unix_socket = g_strdup(optarg);
518 break;
519 case 'r':
520 enable_ro = true;
521 break;
522 case 'h':
523 default:
524 printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
525 " | -r Enable read-only ] | [ -h ]\n", argv[0]);
526 return 0;
527 }
528 }
529
530 if (!unix_socket || !blk_file) {
531 printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
532 " | -r Enable read-only ] | [ -h ]\n", argv[0]);
533 return -1;
534 }
535
536 lsock = unix_sock_new(unix_socket);
537 if (lsock < 0) {
538 goto err;
539 }
540
541 csock = accept(lsock, (void *)0, (void *)0);
542 if (csock < 0) {
543 fprintf(stderr, "Accept error %s\n", strerror(errno));
544 goto err;
545 }
546
547 vdev_blk = vub_new(blk_file);
548 if (!vdev_blk) {
549 goto err;
550 }
551 if (enable_ro) {
552 vdev_blk->enable_ro = true;
553 }
554
555 vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
556
557 g_main_loop_run(vdev_blk->loop);
558
559 vug_deinit(&vdev_blk->parent);
560
561 err:
562 vub_free(vdev_blk);
563 if (csock >= 0) {
564 close(csock);
565 }
566 if (lsock >= 0) {
567 close(lsock);
568 }
569 g_free(unix_socket);
570 g_free(blk_file);
571
572 return 0;
573 }