virtio-blk: Use VirtIOBlockReq.in to drop VirtIOBlockReq.inhdr
[qemu.git] / hw / block / dataplane / virtio-blk.c
1 /*
2 * Dedicated thread for virtio-blk I/O processing
3 *
4 * Copyright 2012 IBM, Corp.
5 * Copyright 2012 Red Hat, Inc. and/or its affiliates
6 *
7 * Authors:
8 * Stefan Hajnoczi <stefanha@redhat.com>
9 *
10 * This work is licensed under the terms of the GNU GPL, version 2 or later.
11 * See the COPYING file in the top-level directory.
12 *
13 */
14
15 #include "trace.h"
16 #include "qemu/iov.h"
17 #include "qemu/thread.h"
18 #include "qemu/error-report.h"
19 #include "hw/virtio/dataplane/vring.h"
20 #include "block/block.h"
21 #include "hw/virtio/virtio-blk.h"
22 #include "virtio-blk.h"
23 #include "block/aio.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qom/object_interfaces.h"
26
27 struct VirtIOBlockDataPlane {
28 bool started;
29 bool starting;
30 bool stopping;
31
32 VirtIOBlkConf *blk;
33
34 VirtIODevice *vdev;
35 Vring vring; /* virtqueue vring */
36 EventNotifier *guest_notifier; /* irq */
37
38 /* Note that these EventNotifiers are assigned by value. This is
39 * fine as long as you do not call event_notifier_cleanup on them
40 * (because you don't own the file descriptor or handle; you just
41 * use it).
42 */
43 IOThread *iothread;
44 IOThread internal_iothread_obj;
45 AioContext *ctx;
46 EventNotifier host_notifier; /* doorbell */
47
48 /* Operation blocker on BDS */
49 Error *blocker;
50 };
51
52 /* Raise an interrupt to signal guest, if necessary */
53 static void notify_guest(VirtIOBlockDataPlane *s)
54 {
55 if (!vring_should_notify(s->vdev, &s->vring)) {
56 return;
57 }
58
59 event_notifier_set(s->guest_notifier);
60 }
61
62 static void complete_rdwr(void *opaque, int ret)
63 {
64 VirtIOBlockReq *req = opaque;
65 struct virtio_blk_inhdr hdr;
66 int len;
67
68 if (likely(ret == 0)) {
69 hdr.status = VIRTIO_BLK_S_OK;
70 len = req->qiov.size;
71 } else {
72 hdr.status = VIRTIO_BLK_S_IOERR;
73 len = 0;
74 }
75
76 trace_virtio_blk_data_plane_complete_request(req->dev->dataplane,
77 req->elem->index, ret);
78
79 stb_p(&req->in->status, hdr.status);
80
81 /* According to the virtio specification len should be the number of bytes
82 * written to, but for virtio-blk it seems to be the number of bytes
83 * transferred plus the status bytes.
84 */
85 vring_push(&req->dev->dataplane->vring, req->elem, len + sizeof(hdr));
86 notify_guest(req->dev->dataplane);
87 g_slice_free(VirtIOBlockReq, req);
88 }
89
90 static void complete_request_early(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
91 struct virtio_blk_inhdr *inhdr,
92 unsigned char status)
93 {
94 stb_p(&inhdr->status, status);
95
96 vring_push(&s->vring, elem, sizeof(*inhdr));
97 notify_guest(s);
98 }
99
100 /* Get disk serial number */
101 static void do_get_id_cmd(VirtIOBlockDataPlane *s,
102 struct iovec *iov, unsigned int iov_cnt,
103 VirtQueueElement *elem,
104 struct virtio_blk_inhdr *inhdr)
105 {
106 char id[VIRTIO_BLK_ID_BYTES];
107
108 /* Serial number not NUL-terminated when longer than buffer */
109 strncpy(id, s->blk->serial ? s->blk->serial : "", sizeof(id));
110 iov_from_buf(iov, iov_cnt, 0, id, sizeof(id));
111 complete_request_early(s, elem, inhdr, VIRTIO_BLK_S_OK);
112 }
113
114 static void do_rdwr_cmd(VirtIOBlockDataPlane *s, bool read,
115 struct iovec *iov, unsigned iov_cnt,
116 int64_t sector_num, VirtQueueElement *elem,
117 struct virtio_blk_inhdr *inhdr)
118 {
119 VirtIOBlock *dev = VIRTIO_BLK(s->vdev);
120 VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
121 QEMUIOVector *qiov;
122 int nb_sectors;
123
124 /* Fill in virtio block metadata needed for completion */
125 req->elem = elem;
126 req->dev = dev;
127 req->in = inhdr;
128 qemu_iovec_init_external(&req->qiov, iov, iov_cnt);
129
130 qiov = &req->qiov;
131
132 nb_sectors = qiov->size / BDRV_SECTOR_SIZE;
133
134 if (read) {
135 bdrv_aio_readv(s->blk->conf.bs, sector_num, qiov, nb_sectors,
136 complete_rdwr, req);
137 } else {
138 bdrv_aio_writev(s->blk->conf.bs, sector_num, qiov, nb_sectors,
139 complete_rdwr, req);
140 }
141 }
142
143 static void complete_flush(void *opaque, int ret)
144 {
145 VirtIOBlockReq *req = opaque;
146 unsigned char status;
147
148 if (ret == 0) {
149 status = VIRTIO_BLK_S_OK;
150 } else {
151 status = VIRTIO_BLK_S_IOERR;
152 }
153
154 complete_request_early(req->dev->dataplane, req->elem, req->in, status);
155 g_slice_free(VirtIOBlockReq, req);
156 }
157
158 static void do_flush_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
159 struct virtio_blk_inhdr *inhdr)
160 {
161 VirtIOBlock *dev = VIRTIO_BLK(s->vdev);
162 VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
163 req->dev = dev;
164 req->elem = elem;
165 req->in = inhdr;
166
167 bdrv_aio_flush(s->blk->conf.bs, complete_flush, req);
168 }
169
170 static void do_scsi_cmd(VirtIOBlockDataPlane *s, VirtQueueElement *elem,
171 struct virtio_blk_inhdr *inhdr)
172 {
173 int status;
174
175 status = virtio_blk_handle_scsi_req(VIRTIO_BLK(s->vdev), elem);
176 complete_request_early(s, elem, inhdr, status);
177 }
178
179 static int process_request(VirtIOBlockDataPlane *s, VirtQueueElement *elem)
180 {
181 struct iovec *iov = elem->out_sg;
182 struct iovec *in_iov = elem->in_sg;
183 unsigned out_num = elem->out_num;
184 unsigned in_num = elem->in_num;
185 struct virtio_blk_outhdr outhdr;
186 struct virtio_blk_inhdr *inhdr;
187
188 /* Copy in outhdr */
189 if (unlikely(iov_to_buf(iov, out_num, 0, &outhdr,
190 sizeof(outhdr)) != sizeof(outhdr))) {
191 error_report("virtio-blk request outhdr too short");
192 return -EFAULT;
193 }
194 iov_discard_front(&iov, &out_num, sizeof(outhdr));
195
196 /* We are likely safe with the iov_len check, because inhdr is only 1 byte,
197 * but checking here in case the header gets bigger in the future. */
198 if (in_num < 1 || in_iov[in_num - 1].iov_len < sizeof(*inhdr)) {
199 error_report("virtio-blk request inhdr too short");
200 return -EFAULT;
201 }
202
203 /* Grab inhdr for later */
204 inhdr = (void *)in_iov[in_num - 1].iov_base
205 + in_iov[in_num - 1].iov_len - sizeof(*inhdr);
206 iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
207
208 /* TODO Linux sets the barrier bit even when not advertised! */
209 outhdr.type &= ~VIRTIO_BLK_T_BARRIER;
210
211 switch (outhdr.type) {
212 case VIRTIO_BLK_T_IN:
213 do_rdwr_cmd(s, true, in_iov, in_num,
214 outhdr.sector * 512 / BDRV_SECTOR_SIZE,
215 elem, inhdr);
216 return 0;
217
218 case VIRTIO_BLK_T_OUT:
219 do_rdwr_cmd(s, false, iov, out_num,
220 outhdr.sector * 512 / BDRV_SECTOR_SIZE,
221 elem, inhdr);
222 return 0;
223
224 case VIRTIO_BLK_T_SCSI_CMD:
225 do_scsi_cmd(s, elem, inhdr);
226 return 0;
227
228 case VIRTIO_BLK_T_FLUSH:
229 do_flush_cmd(s, elem, inhdr);
230 return 0;
231
232 case VIRTIO_BLK_T_GET_ID:
233 do_get_id_cmd(s, in_iov, in_num, elem, inhdr);
234 return 0;
235
236 default:
237 error_report("virtio-blk unsupported request type %#x", outhdr.type);
238 return -EFAULT;
239 }
240 }
241
242 static void handle_notify(EventNotifier *e)
243 {
244 VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
245 host_notifier);
246
247 VirtQueueElement *elem;
248 int ret;
249
250 event_notifier_test_and_clear(&s->host_notifier);
251 for (;;) {
252 /* Disable guest->host notifies to avoid unnecessary vmexits */
253 vring_disable_notification(s->vdev, &s->vring);
254
255 for (;;) {
256 ret = vring_pop(s->vdev, &s->vring, &elem);
257 if (ret < 0) {
258 assert(elem == NULL);
259 break; /* no more requests */
260 }
261
262 trace_virtio_blk_data_plane_process_request(s, elem->out_num,
263 elem->in_num, elem->index);
264
265 if (process_request(s, elem) < 0) {
266 vring_set_broken(&s->vring);
267 vring_free_element(elem);
268 ret = -EFAULT;
269 break;
270 }
271 }
272
273 if (likely(ret == -EAGAIN)) { /* vring emptied */
274 /* Re-enable guest->host notifies and stop processing the vring.
275 * But if the guest has snuck in more descriptors, keep processing.
276 */
277 if (vring_enable_notification(s->vdev, &s->vring)) {
278 break;
279 }
280 } else { /* fatal error */
281 break;
282 }
283 }
284 }
285
286 /* Context: QEMU global mutex held */
287 void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *blk,
288 VirtIOBlockDataPlane **dataplane,
289 Error **errp)
290 {
291 VirtIOBlockDataPlane *s;
292 Error *local_err = NULL;
293
294 *dataplane = NULL;
295
296 if (!blk->data_plane) {
297 return;
298 }
299
300 /* If dataplane is (re-)enabled while the guest is running there could be
301 * block jobs that can conflict.
302 */
303 if (bdrv_op_is_blocked(blk->conf.bs, BLOCK_OP_TYPE_DATAPLANE, &local_err)) {
304 error_report("cannot start dataplane thread: %s",
305 error_get_pretty(local_err));
306 error_free(local_err);
307 return;
308 }
309
310 s = g_new0(VirtIOBlockDataPlane, 1);
311 s->vdev = vdev;
312 s->blk = blk;
313
314 if (blk->iothread) {
315 s->iothread = blk->iothread;
316 object_ref(OBJECT(s->iothread));
317 } else {
318 /* Create per-device IOThread if none specified. This is for
319 * x-data-plane option compatibility. If x-data-plane is removed we
320 * can drop this.
321 */
322 object_initialize(&s->internal_iothread_obj,
323 sizeof(s->internal_iothread_obj),
324 TYPE_IOTHREAD);
325 user_creatable_complete(OBJECT(&s->internal_iothread_obj), &error_abort);
326 s->iothread = &s->internal_iothread_obj;
327 }
328 s->ctx = iothread_get_aio_context(s->iothread);
329
330 error_setg(&s->blocker, "block device is in use by data plane");
331 bdrv_op_block_all(blk->conf.bs, s->blocker);
332
333 *dataplane = s;
334 }
335
336 /* Context: QEMU global mutex held */
337 void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
338 {
339 if (!s) {
340 return;
341 }
342
343 virtio_blk_data_plane_stop(s);
344 bdrv_op_unblock_all(s->blk->conf.bs, s->blocker);
345 error_free(s->blocker);
346 object_unref(OBJECT(s->iothread));
347 g_free(s);
348 }
349
350 /* Context: QEMU global mutex held */
351 void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
352 {
353 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
354 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
355 VirtQueue *vq;
356
357 if (s->started) {
358 return;
359 }
360
361 if (s->starting) {
362 return;
363 }
364
365 s->starting = true;
366
367 vq = virtio_get_queue(s->vdev, 0);
368 if (!vring_setup(&s->vring, s->vdev, 0)) {
369 s->starting = false;
370 return;
371 }
372
373 /* Set up guest notifier (irq) */
374 if (k->set_guest_notifiers(qbus->parent, 1, true) != 0) {
375 fprintf(stderr, "virtio-blk failed to set guest notifier, "
376 "ensure -enable-kvm is set\n");
377 exit(1);
378 }
379 s->guest_notifier = virtio_queue_get_guest_notifier(vq);
380
381 /* Set up virtqueue notify */
382 if (k->set_host_notifier(qbus->parent, 0, true) != 0) {
383 fprintf(stderr, "virtio-blk failed to set host notifier\n");
384 exit(1);
385 }
386 s->host_notifier = *virtio_queue_get_host_notifier(vq);
387
388 s->starting = false;
389 s->started = true;
390 trace_virtio_blk_data_plane_start(s);
391
392 bdrv_set_aio_context(s->blk->conf.bs, s->ctx);
393
394 /* Kick right away to begin processing requests already in vring */
395 event_notifier_set(virtio_queue_get_host_notifier(vq));
396
397 /* Get this show started by hooking up our callbacks */
398 aio_context_acquire(s->ctx);
399 aio_set_event_notifier(s->ctx, &s->host_notifier, handle_notify);
400 aio_context_release(s->ctx);
401 }
402
403 /* Context: QEMU global mutex held */
404 void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
405 {
406 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
407 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
408 if (!s->started || s->stopping) {
409 return;
410 }
411 s->stopping = true;
412 trace_virtio_blk_data_plane_stop(s);
413
414 aio_context_acquire(s->ctx);
415
416 /* Stop notifications for new requests from guest */
417 aio_set_event_notifier(s->ctx, &s->host_notifier, NULL);
418
419 /* Drain and switch bs back to the QEMU main loop */
420 bdrv_set_aio_context(s->blk->conf.bs, qemu_get_aio_context());
421
422 aio_context_release(s->ctx);
423
424 /* Sync vring state back to virtqueue so that non-dataplane request
425 * processing can continue when we disable the host notifier below.
426 */
427 vring_teardown(&s->vring, s->vdev, 0);
428
429 k->set_host_notifier(qbus->parent, 0, false);
430
431 /* Clean up guest notifier (irq) */
432 k->set_guest_notifiers(qbus->parent, 1, false);
433
434 s->started = false;
435 s->stopping = false;
436 }