Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-03-16' into staging
[qemu.git] / block / block-backend.c
1 /*
2 * QEMU Block backends
3 *
4 * Copyright (C) 2014-2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Markus Armbruster <armbru@redhat.com>,
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1
10 * or later. See the COPYING.LIB file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/throttle-groups.h"
18 #include "sysemu/blockdev.h"
19 #include "sysemu/sysemu.h"
20 #include "qapi-event.h"
21 #include "qemu/id.h"
22 #include "trace.h"
23
24 /* Number of coroutines to reserve per attached device model */
25 #define COROUTINE_POOL_RESERVATION 64
26
27 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
28
29 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
30
31 struct BlockBackend {
32 char *name;
33 int refcnt;
34 BdrvChild *root;
35 DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
36 QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
37 QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
38 BlockBackendPublic public;
39
40 void *dev; /* attached device model, if any */
41 bool legacy_dev; /* true if dev is not a DeviceState */
42 /* TODO change to DeviceState when all users are qdevified */
43 const BlockDevOps *dev_ops;
44 void *dev_opaque;
45
46 /* the block size for which the guest device expects atomicity */
47 int guest_block_size;
48
49 /* If the BDS tree is removed, some of its options are stored here (which
50 * can be used to restore those options in the new BDS on insert) */
51 BlockBackendRootState root_state;
52
53 bool enable_write_cache;
54
55 /* I/O stats (display with "info blockstats"). */
56 BlockAcctStats stats;
57
58 BlockdevOnError on_read_error, on_write_error;
59 bool iostatus_enabled;
60 BlockDeviceIoStatus iostatus;
61
62 uint64_t perm;
63 uint64_t shared_perm;
64
65 bool allow_write_beyond_eof;
66
67 NotifierList remove_bs_notifiers, insert_bs_notifiers;
68 };
69
70 typedef struct BlockBackendAIOCB {
71 BlockAIOCB common;
72 BlockBackend *blk;
73 int ret;
74 } BlockBackendAIOCB;
75
76 static const AIOCBInfo block_backend_aiocb_info = {
77 .get_aio_context = blk_aiocb_get_aio_context,
78 .aiocb_size = sizeof(BlockBackendAIOCB),
79 };
80
81 static void drive_info_del(DriveInfo *dinfo);
82 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
83 static char *blk_get_attached_dev_id(BlockBackend *blk);
84
85 /* All BlockBackends */
86 static QTAILQ_HEAD(, BlockBackend) block_backends =
87 QTAILQ_HEAD_INITIALIZER(block_backends);
88
89 /* All BlockBackends referenced by the monitor and which are iterated through by
90 * blk_next() */
91 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
92 QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
93
94 static void blk_root_inherit_options(int *child_flags, QDict *child_options,
95 int parent_flags, QDict *parent_options)
96 {
97 /* We're not supposed to call this function for root nodes */
98 abort();
99 }
100 static void blk_root_drained_begin(BdrvChild *child);
101 static void blk_root_drained_end(BdrvChild *child);
102
103 static void blk_root_change_media(BdrvChild *child, bool load);
104 static void blk_root_resize(BdrvChild *child);
105
106 static char *blk_root_get_parent_desc(BdrvChild *child)
107 {
108 BlockBackend *blk = child->opaque;
109 char *dev_id;
110
111 if (blk->name) {
112 return g_strdup(blk->name);
113 }
114
115 dev_id = blk_get_attached_dev_id(blk);
116 if (*dev_id) {
117 return dev_id;
118 } else {
119 /* TODO Callback into the BB owner for something more detailed */
120 g_free(dev_id);
121 return g_strdup("a block device");
122 }
123 }
124
125 static const char *blk_root_get_name(BdrvChild *child)
126 {
127 return blk_name(child->opaque);
128 }
129
130 static const BdrvChildRole child_root = {
131 .inherit_options = blk_root_inherit_options,
132
133 .change_media = blk_root_change_media,
134 .resize = blk_root_resize,
135 .get_name = blk_root_get_name,
136 .get_parent_desc = blk_root_get_parent_desc,
137
138 .drained_begin = blk_root_drained_begin,
139 .drained_end = blk_root_drained_end,
140 };
141
142 /*
143 * Create a new BlockBackend with a reference count of one.
144 *
145 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
146 * to request for a block driver node that is attached to this BlockBackend.
147 * @shared_perm is a bitmask which describes which permissions may be granted
148 * to other users of the attached node.
149 * Both sets of permissions can be changed later using blk_set_perm().
150 *
151 * Return the new BlockBackend on success, null on failure.
152 */
153 BlockBackend *blk_new(uint64_t perm, uint64_t shared_perm)
154 {
155 BlockBackend *blk;
156
157 blk = g_new0(BlockBackend, 1);
158 blk->refcnt = 1;
159 blk->perm = perm;
160 blk->shared_perm = shared_perm;
161 blk_set_enable_write_cache(blk, true);
162
163 qemu_co_queue_init(&blk->public.throttled_reqs[0]);
164 qemu_co_queue_init(&blk->public.throttled_reqs[1]);
165
166 notifier_list_init(&blk->remove_bs_notifiers);
167 notifier_list_init(&blk->insert_bs_notifiers);
168
169 QTAILQ_INSERT_TAIL(&block_backends, blk, link);
170 return blk;
171 }
172
173 /*
174 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
175 *
176 * Just as with bdrv_open(), after having called this function the reference to
177 * @options belongs to the block layer (even on failure).
178 *
179 * TODO: Remove @filename and @flags; it should be possible to specify a whole
180 * BDS tree just by specifying the @options QDict (or @reference,
181 * alternatively). At the time of adding this function, this is not possible,
182 * though, so callers of this function have to be able to specify @filename and
183 * @flags.
184 */
185 BlockBackend *blk_new_open(const char *filename, const char *reference,
186 QDict *options, int flags, Error **errp)
187 {
188 BlockBackend *blk;
189 BlockDriverState *bs;
190 uint64_t perm;
191
192 /* blk_new_open() is mainly used in .bdrv_create implementations and the
193 * tools where sharing isn't a concern because the BDS stays private, so we
194 * just request permission according to the flags.
195 *
196 * The exceptions are xen_disk and blockdev_init(); in these cases, the
197 * caller of blk_new_open() doesn't make use of the permissions, but they
198 * shouldn't hurt either. We can still share everything here because the
199 * guest devices will add their own blockers if they can't share. */
200 perm = BLK_PERM_CONSISTENT_READ;
201 if (flags & BDRV_O_RDWR) {
202 perm |= BLK_PERM_WRITE;
203 }
204 if (flags & BDRV_O_RESIZE) {
205 perm |= BLK_PERM_RESIZE;
206 }
207
208 blk = blk_new(perm, BLK_PERM_ALL);
209 bs = bdrv_open(filename, reference, options, flags, errp);
210 if (!bs) {
211 blk_unref(blk);
212 return NULL;
213 }
214
215 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
216 perm, BLK_PERM_ALL, blk, errp);
217 if (!blk->root) {
218 bdrv_unref(bs);
219 blk_unref(blk);
220 return NULL;
221 }
222
223 return blk;
224 }
225
226 static void blk_delete(BlockBackend *blk)
227 {
228 assert(!blk->refcnt);
229 assert(!blk->name);
230 assert(!blk->dev);
231 if (blk->root) {
232 blk_remove_bs(blk);
233 }
234 assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
235 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
236 QTAILQ_REMOVE(&block_backends, blk, link);
237 drive_info_del(blk->legacy_dinfo);
238 block_acct_cleanup(&blk->stats);
239 g_free(blk);
240 }
241
242 static void drive_info_del(DriveInfo *dinfo)
243 {
244 if (!dinfo) {
245 return;
246 }
247 qemu_opts_del(dinfo->opts);
248 g_free(dinfo->serial);
249 g_free(dinfo);
250 }
251
252 int blk_get_refcnt(BlockBackend *blk)
253 {
254 return blk ? blk->refcnt : 0;
255 }
256
257 /*
258 * Increment @blk's reference count.
259 * @blk must not be null.
260 */
261 void blk_ref(BlockBackend *blk)
262 {
263 blk->refcnt++;
264 }
265
266 /*
267 * Decrement @blk's reference count.
268 * If this drops it to zero, destroy @blk.
269 * For convenience, do nothing if @blk is null.
270 */
271 void blk_unref(BlockBackend *blk)
272 {
273 if (blk) {
274 assert(blk->refcnt > 0);
275 if (!--blk->refcnt) {
276 blk_delete(blk);
277 }
278 }
279 }
280
281 /*
282 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
283 * ones which are hidden (i.e. are not referenced by the monitor).
284 */
285 static BlockBackend *blk_all_next(BlockBackend *blk)
286 {
287 return blk ? QTAILQ_NEXT(blk, link)
288 : QTAILQ_FIRST(&block_backends);
289 }
290
291 void blk_remove_all_bs(void)
292 {
293 BlockBackend *blk = NULL;
294
295 while ((blk = blk_all_next(blk)) != NULL) {
296 AioContext *ctx = blk_get_aio_context(blk);
297
298 aio_context_acquire(ctx);
299 if (blk->root) {
300 blk_remove_bs(blk);
301 }
302 aio_context_release(ctx);
303 }
304 }
305
306 /*
307 * Return the monitor-owned BlockBackend after @blk.
308 * If @blk is null, return the first one.
309 * Else, return @blk's next sibling, which may be null.
310 *
311 * To iterate over all BlockBackends, do
312 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
313 * ...
314 * }
315 */
316 BlockBackend *blk_next(BlockBackend *blk)
317 {
318 return blk ? QTAILQ_NEXT(blk, monitor_link)
319 : QTAILQ_FIRST(&monitor_block_backends);
320 }
321
322 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
323 * the monitor or attached to a BlockBackend */
324 BlockDriverState *bdrv_next(BdrvNextIterator *it)
325 {
326 BlockDriverState *bs;
327
328 /* First, return all root nodes of BlockBackends. In order to avoid
329 * returning a BDS twice when multiple BBs refer to it, we only return it
330 * if the BB is the first one in the parent list of the BDS. */
331 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
332 do {
333 it->blk = blk_all_next(it->blk);
334 bs = it->blk ? blk_bs(it->blk) : NULL;
335 } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
336
337 if (bs) {
338 return bs;
339 }
340 it->phase = BDRV_NEXT_MONITOR_OWNED;
341 }
342
343 /* Then return the monitor-owned BDSes without a BB attached. Ignore all
344 * BDSes that are attached to a BlockBackend here; they have been handled
345 * by the above block already */
346 do {
347 it->bs = bdrv_next_monitor_owned(it->bs);
348 bs = it->bs;
349 } while (bs && bdrv_has_blk(bs));
350
351 return bs;
352 }
353
354 BlockDriverState *bdrv_first(BdrvNextIterator *it)
355 {
356 *it = (BdrvNextIterator) {
357 .phase = BDRV_NEXT_BACKEND_ROOTS,
358 };
359
360 return bdrv_next(it);
361 }
362
363 /*
364 * Add a BlockBackend into the list of backends referenced by the monitor, with
365 * the given @name acting as the handle for the monitor.
366 * Strictly for use by blockdev.c.
367 *
368 * @name must not be null or empty.
369 *
370 * Returns true on success and false on failure. In the latter case, an Error
371 * object is returned through @errp.
372 */
373 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
374 {
375 assert(!blk->name);
376 assert(name && name[0]);
377
378 if (!id_wellformed(name)) {
379 error_setg(errp, "Invalid device name");
380 return false;
381 }
382 if (blk_by_name(name)) {
383 error_setg(errp, "Device with id '%s' already exists", name);
384 return false;
385 }
386 if (bdrv_find_node(name)) {
387 error_setg(errp,
388 "Device name '%s' conflicts with an existing node name",
389 name);
390 return false;
391 }
392
393 blk->name = g_strdup(name);
394 QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
395 return true;
396 }
397
398 /*
399 * Remove a BlockBackend from the list of backends referenced by the monitor.
400 * Strictly for use by blockdev.c.
401 */
402 void monitor_remove_blk(BlockBackend *blk)
403 {
404 if (!blk->name) {
405 return;
406 }
407
408 QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
409 g_free(blk->name);
410 blk->name = NULL;
411 }
412
413 /*
414 * Return @blk's name, a non-null string.
415 * Returns an empty string iff @blk is not referenced by the monitor.
416 */
417 const char *blk_name(BlockBackend *blk)
418 {
419 return blk->name ?: "";
420 }
421
422 /*
423 * Return the BlockBackend with name @name if it exists, else null.
424 * @name must not be null.
425 */
426 BlockBackend *blk_by_name(const char *name)
427 {
428 BlockBackend *blk = NULL;
429
430 assert(name);
431 while ((blk = blk_next(blk)) != NULL) {
432 if (!strcmp(name, blk->name)) {
433 return blk;
434 }
435 }
436 return NULL;
437 }
438
439 /*
440 * Return the BlockDriverState attached to @blk if any, else null.
441 */
442 BlockDriverState *blk_bs(BlockBackend *blk)
443 {
444 return blk->root ? blk->root->bs : NULL;
445 }
446
447 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
448 {
449 BdrvChild *child;
450 QLIST_FOREACH(child, &bs->parents, next_parent) {
451 if (child->role == &child_root) {
452 return child->opaque;
453 }
454 }
455
456 return NULL;
457 }
458
459 /*
460 * Returns true if @bs has an associated BlockBackend.
461 */
462 bool bdrv_has_blk(BlockDriverState *bs)
463 {
464 return bdrv_first_blk(bs) != NULL;
465 }
466
467 /*
468 * Returns true if @bs has only BlockBackends as parents.
469 */
470 bool bdrv_is_root_node(BlockDriverState *bs)
471 {
472 BdrvChild *c;
473
474 QLIST_FOREACH(c, &bs->parents, next_parent) {
475 if (c->role != &child_root) {
476 return false;
477 }
478 }
479
480 return true;
481 }
482
483 /*
484 * Return @blk's DriveInfo if any, else null.
485 */
486 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
487 {
488 return blk->legacy_dinfo;
489 }
490
491 /*
492 * Set @blk's DriveInfo to @dinfo, and return it.
493 * @blk must not have a DriveInfo set already.
494 * No other BlockBackend may have the same DriveInfo set.
495 */
496 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
497 {
498 assert(!blk->legacy_dinfo);
499 return blk->legacy_dinfo = dinfo;
500 }
501
502 /*
503 * Return the BlockBackend with DriveInfo @dinfo.
504 * It must exist.
505 */
506 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
507 {
508 BlockBackend *blk = NULL;
509
510 while ((blk = blk_next(blk)) != NULL) {
511 if (blk->legacy_dinfo == dinfo) {
512 return blk;
513 }
514 }
515 abort();
516 }
517
518 /*
519 * Returns a pointer to the publicly accessible fields of @blk.
520 */
521 BlockBackendPublic *blk_get_public(BlockBackend *blk)
522 {
523 return &blk->public;
524 }
525
526 /*
527 * Returns a BlockBackend given the associated @public fields.
528 */
529 BlockBackend *blk_by_public(BlockBackendPublic *public)
530 {
531 return container_of(public, BlockBackend, public);
532 }
533
534 /*
535 * Disassociates the currently associated BlockDriverState from @blk.
536 */
537 void blk_remove_bs(BlockBackend *blk)
538 {
539 notifier_list_notify(&blk->remove_bs_notifiers, blk);
540 if (blk->public.throttle_state) {
541 throttle_timers_detach_aio_context(&blk->public.throttle_timers);
542 }
543
544 blk_update_root_state(blk);
545
546 bdrv_root_unref_child(blk->root);
547 blk->root = NULL;
548 }
549
550 /*
551 * Associates a new BlockDriverState with @blk.
552 */
553 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
554 {
555 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
556 blk->perm, blk->shared_perm, blk, errp);
557 if (blk->root == NULL) {
558 return -EPERM;
559 }
560 bdrv_ref(bs);
561
562 notifier_list_notify(&blk->insert_bs_notifiers, blk);
563 if (blk->public.throttle_state) {
564 throttle_timers_attach_aio_context(
565 &blk->public.throttle_timers, bdrv_get_aio_context(bs));
566 }
567
568 return 0;
569 }
570
571 /*
572 * Sets the permission bitmasks that the user of the BlockBackend needs.
573 */
574 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
575 Error **errp)
576 {
577 int ret;
578
579 if (blk->root) {
580 ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
581 if (ret < 0) {
582 return ret;
583 }
584 }
585
586 blk->perm = perm;
587 blk->shared_perm = shared_perm;
588
589 return 0;
590 }
591
592 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
593 {
594 *perm = blk->perm;
595 *shared_perm = blk->shared_perm;
596 }
597
598 static int blk_do_attach_dev(BlockBackend *blk, void *dev)
599 {
600 if (blk->dev) {
601 return -EBUSY;
602 }
603 blk_ref(blk);
604 blk->dev = dev;
605 blk->legacy_dev = false;
606 blk_iostatus_reset(blk);
607 return 0;
608 }
609
610 /*
611 * Attach device model @dev to @blk.
612 * Return 0 on success, -EBUSY when a device model is attached already.
613 */
614 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
615 {
616 return blk_do_attach_dev(blk, dev);
617 }
618
619 /*
620 * Attach device model @dev to @blk.
621 * @blk must not have a device model attached already.
622 * TODO qdevified devices don't use this, remove when devices are qdevified
623 */
624 void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
625 {
626 if (blk_do_attach_dev(blk, dev) < 0) {
627 abort();
628 }
629 blk->legacy_dev = true;
630 }
631
632 /*
633 * Detach device model @dev from @blk.
634 * @dev must be currently attached to @blk.
635 */
636 void blk_detach_dev(BlockBackend *blk, void *dev)
637 /* TODO change to DeviceState *dev when all users are qdevified */
638 {
639 assert(blk->dev == dev);
640 blk->dev = NULL;
641 blk->dev_ops = NULL;
642 blk->dev_opaque = NULL;
643 blk->guest_block_size = 512;
644 blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
645 blk_unref(blk);
646 }
647
648 /*
649 * Return the device model attached to @blk if any, else null.
650 */
651 void *blk_get_attached_dev(BlockBackend *blk)
652 /* TODO change to return DeviceState * when all users are qdevified */
653 {
654 return blk->dev;
655 }
656
657 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
658 * device attached to the BlockBackend. */
659 static char *blk_get_attached_dev_id(BlockBackend *blk)
660 {
661 DeviceState *dev;
662
663 assert(!blk->legacy_dev);
664 dev = blk->dev;
665
666 if (!dev) {
667 return g_strdup("");
668 } else if (dev->id) {
669 return g_strdup(dev->id);
670 }
671 return object_get_canonical_path(OBJECT(dev));
672 }
673
674 /*
675 * Return the BlockBackend which has the device model @dev attached if it
676 * exists, else null.
677 *
678 * @dev must not be null.
679 */
680 BlockBackend *blk_by_dev(void *dev)
681 {
682 BlockBackend *blk = NULL;
683
684 assert(dev != NULL);
685 while ((blk = blk_all_next(blk)) != NULL) {
686 if (blk->dev == dev) {
687 return blk;
688 }
689 }
690 return NULL;
691 }
692
693 /*
694 * Set @blk's device model callbacks to @ops.
695 * @opaque is the opaque argument to pass to the callbacks.
696 * This is for use by device models.
697 */
698 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
699 void *opaque)
700 {
701 /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
702 * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
703 * is set. */
704 assert(!blk->legacy_dev);
705
706 blk->dev_ops = ops;
707 blk->dev_opaque = opaque;
708 }
709
710 /*
711 * Notify @blk's attached device model of media change.
712 *
713 * If @load is true, notify of media load. This action can fail, meaning that
714 * the medium cannot be loaded. @errp is set then.
715 *
716 * If @load is false, notify of media eject. This can never fail.
717 *
718 * Also send DEVICE_TRAY_MOVED events as appropriate.
719 */
720 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
721 {
722 if (blk->dev_ops && blk->dev_ops->change_media_cb) {
723 bool tray_was_open, tray_is_open;
724 Error *local_err = NULL;
725
726 assert(!blk->legacy_dev);
727
728 tray_was_open = blk_dev_is_tray_open(blk);
729 blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
730 if (local_err) {
731 assert(load == true);
732 error_propagate(errp, local_err);
733 return;
734 }
735 tray_is_open = blk_dev_is_tray_open(blk);
736
737 if (tray_was_open != tray_is_open) {
738 char *id = blk_get_attached_dev_id(blk);
739 qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
740 &error_abort);
741 g_free(id);
742 }
743 }
744 }
745
746 static void blk_root_change_media(BdrvChild *child, bool load)
747 {
748 blk_dev_change_media_cb(child->opaque, load, NULL);
749 }
750
751 /*
752 * Does @blk's attached device model have removable media?
753 * %true if no device model is attached.
754 */
755 bool blk_dev_has_removable_media(BlockBackend *blk)
756 {
757 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
758 }
759
760 /*
761 * Does @blk's attached device model have a tray?
762 */
763 bool blk_dev_has_tray(BlockBackend *blk)
764 {
765 return blk->dev_ops && blk->dev_ops->is_tray_open;
766 }
767
768 /*
769 * Notify @blk's attached device model of a media eject request.
770 * If @force is true, the medium is about to be yanked out forcefully.
771 */
772 void blk_dev_eject_request(BlockBackend *blk, bool force)
773 {
774 if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
775 blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
776 }
777 }
778
779 /*
780 * Does @blk's attached device model have a tray, and is it open?
781 */
782 bool blk_dev_is_tray_open(BlockBackend *blk)
783 {
784 if (blk_dev_has_tray(blk)) {
785 return blk->dev_ops->is_tray_open(blk->dev_opaque);
786 }
787 return false;
788 }
789
790 /*
791 * Does @blk's attached device model have the medium locked?
792 * %false if the device model has no such lock.
793 */
794 bool blk_dev_is_medium_locked(BlockBackend *blk)
795 {
796 if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
797 return blk->dev_ops->is_medium_locked(blk->dev_opaque);
798 }
799 return false;
800 }
801
802 /*
803 * Notify @blk's attached device model of a backend size change.
804 */
805 static void blk_root_resize(BdrvChild *child)
806 {
807 BlockBackend *blk = child->opaque;
808
809 if (blk->dev_ops && blk->dev_ops->resize_cb) {
810 blk->dev_ops->resize_cb(blk->dev_opaque);
811 }
812 }
813
814 void blk_iostatus_enable(BlockBackend *blk)
815 {
816 blk->iostatus_enabled = true;
817 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
818 }
819
820 /* The I/O status is only enabled if the drive explicitly
821 * enables it _and_ the VM is configured to stop on errors */
822 bool blk_iostatus_is_enabled(const BlockBackend *blk)
823 {
824 return (blk->iostatus_enabled &&
825 (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
826 blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
827 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
828 }
829
830 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
831 {
832 return blk->iostatus;
833 }
834
835 void blk_iostatus_disable(BlockBackend *blk)
836 {
837 blk->iostatus_enabled = false;
838 }
839
840 void blk_iostatus_reset(BlockBackend *blk)
841 {
842 if (blk_iostatus_is_enabled(blk)) {
843 BlockDriverState *bs = blk_bs(blk);
844 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
845 if (bs && bs->job) {
846 block_job_iostatus_reset(bs->job);
847 }
848 }
849 }
850
851 void blk_iostatus_set_err(BlockBackend *blk, int error)
852 {
853 assert(blk_iostatus_is_enabled(blk));
854 if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
855 blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
856 BLOCK_DEVICE_IO_STATUS_FAILED;
857 }
858 }
859
860 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
861 {
862 blk->allow_write_beyond_eof = allow;
863 }
864
865 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
866 size_t size)
867 {
868 int64_t len;
869
870 if (size > INT_MAX) {
871 return -EIO;
872 }
873
874 if (!blk_is_available(blk)) {
875 return -ENOMEDIUM;
876 }
877
878 if (offset < 0) {
879 return -EIO;
880 }
881
882 if (!blk->allow_write_beyond_eof) {
883 len = blk_getlength(blk);
884 if (len < 0) {
885 return len;
886 }
887
888 if (offset > len || len - offset < size) {
889 return -EIO;
890 }
891 }
892
893 return 0;
894 }
895
896 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
897 unsigned int bytes, QEMUIOVector *qiov,
898 BdrvRequestFlags flags)
899 {
900 int ret;
901 BlockDriverState *bs = blk_bs(blk);
902
903 trace_blk_co_preadv(blk, bs, offset, bytes, flags);
904
905 ret = blk_check_byte_request(blk, offset, bytes);
906 if (ret < 0) {
907 return ret;
908 }
909
910 bdrv_inc_in_flight(bs);
911
912 /* throttling disk I/O */
913 if (blk->public.throttle_state) {
914 throttle_group_co_io_limits_intercept(blk, bytes, false);
915 }
916
917 ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
918 bdrv_dec_in_flight(bs);
919 return ret;
920 }
921
922 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
923 unsigned int bytes, QEMUIOVector *qiov,
924 BdrvRequestFlags flags)
925 {
926 int ret;
927 BlockDriverState *bs = blk_bs(blk);
928
929 trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
930
931 ret = blk_check_byte_request(blk, offset, bytes);
932 if (ret < 0) {
933 return ret;
934 }
935
936 bdrv_inc_in_flight(bs);
937
938 /* throttling disk I/O */
939 if (blk->public.throttle_state) {
940 throttle_group_co_io_limits_intercept(blk, bytes, true);
941 }
942
943 if (!blk->enable_write_cache) {
944 flags |= BDRV_REQ_FUA;
945 }
946
947 ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
948 bdrv_dec_in_flight(bs);
949 return ret;
950 }
951
952 typedef struct BlkRwCo {
953 BlockBackend *blk;
954 int64_t offset;
955 QEMUIOVector *qiov;
956 int ret;
957 BdrvRequestFlags flags;
958 } BlkRwCo;
959
960 static void blk_read_entry(void *opaque)
961 {
962 BlkRwCo *rwco = opaque;
963
964 rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
965 rwco->qiov, rwco->flags);
966 }
967
968 static void blk_write_entry(void *opaque)
969 {
970 BlkRwCo *rwco = opaque;
971
972 rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
973 rwco->qiov, rwco->flags);
974 }
975
976 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
977 int64_t bytes, CoroutineEntry co_entry,
978 BdrvRequestFlags flags)
979 {
980 QEMUIOVector qiov;
981 struct iovec iov;
982 BlkRwCo rwco;
983
984 iov = (struct iovec) {
985 .iov_base = buf,
986 .iov_len = bytes,
987 };
988 qemu_iovec_init_external(&qiov, &iov, 1);
989
990 rwco = (BlkRwCo) {
991 .blk = blk,
992 .offset = offset,
993 .qiov = &qiov,
994 .flags = flags,
995 .ret = NOT_DONE,
996 };
997
998 if (qemu_in_coroutine()) {
999 /* Fast-path if already in coroutine context */
1000 co_entry(&rwco);
1001 } else {
1002 Coroutine *co = qemu_coroutine_create(co_entry, &rwco);
1003 qemu_coroutine_enter(co);
1004 BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
1005 }
1006
1007 return rwco.ret;
1008 }
1009
1010 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
1011 int count)
1012 {
1013 int ret;
1014
1015 ret = blk_check_byte_request(blk, offset, count);
1016 if (ret < 0) {
1017 return ret;
1018 }
1019
1020 blk_root_drained_begin(blk->root);
1021 ret = blk_pread(blk, offset, buf, count);
1022 blk_root_drained_end(blk->root);
1023 return ret;
1024 }
1025
1026 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1027 int count, BdrvRequestFlags flags)
1028 {
1029 return blk_prw(blk, offset, NULL, count, blk_write_entry,
1030 flags | BDRV_REQ_ZERO_WRITE);
1031 }
1032
1033 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1034 {
1035 return bdrv_make_zero(blk->root, flags);
1036 }
1037
1038 static void error_callback_bh(void *opaque)
1039 {
1040 struct BlockBackendAIOCB *acb = opaque;
1041
1042 bdrv_dec_in_flight(acb->common.bs);
1043 acb->common.cb(acb->common.opaque, acb->ret);
1044 qemu_aio_unref(acb);
1045 }
1046
1047 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1048 BlockCompletionFunc *cb,
1049 void *opaque, int ret)
1050 {
1051 struct BlockBackendAIOCB *acb;
1052
1053 bdrv_inc_in_flight(blk_bs(blk));
1054 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1055 acb->blk = blk;
1056 acb->ret = ret;
1057
1058 aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
1059 return &acb->common;
1060 }
1061
1062 typedef struct BlkAioEmAIOCB {
1063 BlockAIOCB common;
1064 BlkRwCo rwco;
1065 int bytes;
1066 bool has_returned;
1067 } BlkAioEmAIOCB;
1068
1069 static const AIOCBInfo blk_aio_em_aiocb_info = {
1070 .aiocb_size = sizeof(BlkAioEmAIOCB),
1071 };
1072
1073 static void blk_aio_complete(BlkAioEmAIOCB *acb)
1074 {
1075 if (acb->has_returned) {
1076 bdrv_dec_in_flight(acb->common.bs);
1077 acb->common.cb(acb->common.opaque, acb->rwco.ret);
1078 qemu_aio_unref(acb);
1079 }
1080 }
1081
1082 static void blk_aio_complete_bh(void *opaque)
1083 {
1084 BlkAioEmAIOCB *acb = opaque;
1085 assert(acb->has_returned);
1086 blk_aio_complete(acb);
1087 }
1088
1089 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
1090 QEMUIOVector *qiov, CoroutineEntry co_entry,
1091 BdrvRequestFlags flags,
1092 BlockCompletionFunc *cb, void *opaque)
1093 {
1094 BlkAioEmAIOCB *acb;
1095 Coroutine *co;
1096
1097 bdrv_inc_in_flight(blk_bs(blk));
1098 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1099 acb->rwco = (BlkRwCo) {
1100 .blk = blk,
1101 .offset = offset,
1102 .qiov = qiov,
1103 .flags = flags,
1104 .ret = NOT_DONE,
1105 };
1106 acb->bytes = bytes;
1107 acb->has_returned = false;
1108
1109 co = qemu_coroutine_create(co_entry, acb);
1110 qemu_coroutine_enter(co);
1111
1112 acb->has_returned = true;
1113 if (acb->rwco.ret != NOT_DONE) {
1114 aio_bh_schedule_oneshot(blk_get_aio_context(blk),
1115 blk_aio_complete_bh, acb);
1116 }
1117
1118 return &acb->common;
1119 }
1120
1121 static void blk_aio_read_entry(void *opaque)
1122 {
1123 BlkAioEmAIOCB *acb = opaque;
1124 BlkRwCo *rwco = &acb->rwco;
1125
1126 assert(rwco->qiov->size == acb->bytes);
1127 rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
1128 rwco->qiov, rwco->flags);
1129 blk_aio_complete(acb);
1130 }
1131
1132 static void blk_aio_write_entry(void *opaque)
1133 {
1134 BlkAioEmAIOCB *acb = opaque;
1135 BlkRwCo *rwco = &acb->rwco;
1136
1137 assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
1138 rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
1139 rwco->qiov, rwco->flags);
1140 blk_aio_complete(acb);
1141 }
1142
1143 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1144 int count, BdrvRequestFlags flags,
1145 BlockCompletionFunc *cb, void *opaque)
1146 {
1147 return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
1148 flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1149 }
1150
1151 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
1152 {
1153 int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
1154 if (ret < 0) {
1155 return ret;
1156 }
1157 return count;
1158 }
1159
1160 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
1161 BdrvRequestFlags flags)
1162 {
1163 int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1164 flags);
1165 if (ret < 0) {
1166 return ret;
1167 }
1168 return count;
1169 }
1170
1171 int64_t blk_getlength(BlockBackend *blk)
1172 {
1173 if (!blk_is_available(blk)) {
1174 return -ENOMEDIUM;
1175 }
1176
1177 return bdrv_getlength(blk_bs(blk));
1178 }
1179
1180 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1181 {
1182 if (!blk_bs(blk)) {
1183 *nb_sectors_ptr = 0;
1184 } else {
1185 bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1186 }
1187 }
1188
1189 int64_t blk_nb_sectors(BlockBackend *blk)
1190 {
1191 if (!blk_is_available(blk)) {
1192 return -ENOMEDIUM;
1193 }
1194
1195 return bdrv_nb_sectors(blk_bs(blk));
1196 }
1197
1198 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1199 QEMUIOVector *qiov, BdrvRequestFlags flags,
1200 BlockCompletionFunc *cb, void *opaque)
1201 {
1202 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1203 blk_aio_read_entry, flags, cb, opaque);
1204 }
1205
1206 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1207 QEMUIOVector *qiov, BdrvRequestFlags flags,
1208 BlockCompletionFunc *cb, void *opaque)
1209 {
1210 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1211 blk_aio_write_entry, flags, cb, opaque);
1212 }
1213
1214 static void blk_aio_flush_entry(void *opaque)
1215 {
1216 BlkAioEmAIOCB *acb = opaque;
1217 BlkRwCo *rwco = &acb->rwco;
1218
1219 rwco->ret = blk_co_flush(rwco->blk);
1220 blk_aio_complete(acb);
1221 }
1222
1223 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1224 BlockCompletionFunc *cb, void *opaque)
1225 {
1226 return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1227 }
1228
1229 static void blk_aio_pdiscard_entry(void *opaque)
1230 {
1231 BlkAioEmAIOCB *acb = opaque;
1232 BlkRwCo *rwco = &acb->rwco;
1233
1234 rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1235 blk_aio_complete(acb);
1236 }
1237
1238 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1239 int64_t offset, int count,
1240 BlockCompletionFunc *cb, void *opaque)
1241 {
1242 return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
1243 cb, opaque);
1244 }
1245
1246 void blk_aio_cancel(BlockAIOCB *acb)
1247 {
1248 bdrv_aio_cancel(acb);
1249 }
1250
1251 void blk_aio_cancel_async(BlockAIOCB *acb)
1252 {
1253 bdrv_aio_cancel_async(acb);
1254 }
1255
1256 int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1257 {
1258 if (!blk_is_available(blk)) {
1259 return -ENOMEDIUM;
1260 }
1261
1262 return bdrv_co_ioctl(blk_bs(blk), req, buf);
1263 }
1264
1265 static void blk_ioctl_entry(void *opaque)
1266 {
1267 BlkRwCo *rwco = opaque;
1268 rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1269 rwco->qiov->iov[0].iov_base);
1270 }
1271
1272 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1273 {
1274 return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
1275 }
1276
1277 static void blk_aio_ioctl_entry(void *opaque)
1278 {
1279 BlkAioEmAIOCB *acb = opaque;
1280 BlkRwCo *rwco = &acb->rwco;
1281
1282 rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
1283 rwco->qiov->iov[0].iov_base);
1284 blk_aio_complete(acb);
1285 }
1286
1287 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1288 BlockCompletionFunc *cb, void *opaque)
1289 {
1290 QEMUIOVector qiov;
1291 struct iovec iov;
1292
1293 iov = (struct iovec) {
1294 .iov_base = buf,
1295 .iov_len = 0,
1296 };
1297 qemu_iovec_init_external(&qiov, &iov, 1);
1298
1299 return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
1300 }
1301
1302 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
1303 {
1304 int ret = blk_check_byte_request(blk, offset, count);
1305 if (ret < 0) {
1306 return ret;
1307 }
1308
1309 return bdrv_co_pdiscard(blk_bs(blk), offset, count);
1310 }
1311
1312 int blk_co_flush(BlockBackend *blk)
1313 {
1314 if (!blk_is_available(blk)) {
1315 return -ENOMEDIUM;
1316 }
1317
1318 return bdrv_co_flush(blk_bs(blk));
1319 }
1320
1321 static void blk_flush_entry(void *opaque)
1322 {
1323 BlkRwCo *rwco = opaque;
1324 rwco->ret = blk_co_flush(rwco->blk);
1325 }
1326
1327 int blk_flush(BlockBackend *blk)
1328 {
1329 return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
1330 }
1331
1332 void blk_drain(BlockBackend *blk)
1333 {
1334 if (blk_bs(blk)) {
1335 bdrv_drain(blk_bs(blk));
1336 }
1337 }
1338
1339 void blk_drain_all(void)
1340 {
1341 bdrv_drain_all();
1342 }
1343
1344 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1345 BlockdevOnError on_write_error)
1346 {
1347 blk->on_read_error = on_read_error;
1348 blk->on_write_error = on_write_error;
1349 }
1350
1351 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1352 {
1353 return is_read ? blk->on_read_error : blk->on_write_error;
1354 }
1355
1356 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1357 int error)
1358 {
1359 BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1360
1361 switch (on_err) {
1362 case BLOCKDEV_ON_ERROR_ENOSPC:
1363 return (error == ENOSPC) ?
1364 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1365 case BLOCKDEV_ON_ERROR_STOP:
1366 return BLOCK_ERROR_ACTION_STOP;
1367 case BLOCKDEV_ON_ERROR_REPORT:
1368 return BLOCK_ERROR_ACTION_REPORT;
1369 case BLOCKDEV_ON_ERROR_IGNORE:
1370 return BLOCK_ERROR_ACTION_IGNORE;
1371 case BLOCKDEV_ON_ERROR_AUTO:
1372 default:
1373 abort();
1374 }
1375 }
1376
1377 static void send_qmp_error_event(BlockBackend *blk,
1378 BlockErrorAction action,
1379 bool is_read, int error)
1380 {
1381 IoOperationType optype;
1382
1383 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1384 qapi_event_send_block_io_error(blk_name(blk),
1385 bdrv_get_node_name(blk_bs(blk)), optype,
1386 action, blk_iostatus_is_enabled(blk),
1387 error == ENOSPC, strerror(error),
1388 &error_abort);
1389 }
1390
1391 /* This is done by device models because, while the block layer knows
1392 * about the error, it does not know whether an operation comes from
1393 * the device or the block layer (from a job, for example).
1394 */
1395 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1396 bool is_read, int error)
1397 {
1398 assert(error >= 0);
1399
1400 if (action == BLOCK_ERROR_ACTION_STOP) {
1401 /* First set the iostatus, so that "info block" returns an iostatus
1402 * that matches the events raised so far (an additional error iostatus
1403 * is fine, but not a lost one).
1404 */
1405 blk_iostatus_set_err(blk, error);
1406
1407 /* Then raise the request to stop the VM and the event.
1408 * qemu_system_vmstop_request_prepare has two effects. First,
1409 * it ensures that the STOP event always comes after the
1410 * BLOCK_IO_ERROR event. Second, it ensures that even if management
1411 * can observe the STOP event and do a "cont" before the STOP
1412 * event is issued, the VM will not stop. In this case, vm_start()
1413 * also ensures that the STOP/RESUME pair of events is emitted.
1414 */
1415 qemu_system_vmstop_request_prepare();
1416 send_qmp_error_event(blk, action, is_read, error);
1417 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1418 } else {
1419 send_qmp_error_event(blk, action, is_read, error);
1420 }
1421 }
1422
1423 int blk_is_read_only(BlockBackend *blk)
1424 {
1425 BlockDriverState *bs = blk_bs(blk);
1426
1427 if (bs) {
1428 return bdrv_is_read_only(bs);
1429 } else {
1430 return blk->root_state.read_only;
1431 }
1432 }
1433
1434 int blk_is_sg(BlockBackend *blk)
1435 {
1436 BlockDriverState *bs = blk_bs(blk);
1437
1438 if (!bs) {
1439 return 0;
1440 }
1441
1442 return bdrv_is_sg(bs);
1443 }
1444
1445 int blk_enable_write_cache(BlockBackend *blk)
1446 {
1447 return blk->enable_write_cache;
1448 }
1449
1450 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1451 {
1452 blk->enable_write_cache = wce;
1453 }
1454
1455 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1456 {
1457 BlockDriverState *bs = blk_bs(blk);
1458
1459 if (!bs) {
1460 error_setg(errp, "Device '%s' has no medium", blk->name);
1461 return;
1462 }
1463
1464 bdrv_invalidate_cache(bs, errp);
1465 }
1466
1467 bool blk_is_inserted(BlockBackend *blk)
1468 {
1469 BlockDriverState *bs = blk_bs(blk);
1470
1471 return bs && bdrv_is_inserted(bs);
1472 }
1473
1474 bool blk_is_available(BlockBackend *blk)
1475 {
1476 return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1477 }
1478
1479 void blk_lock_medium(BlockBackend *blk, bool locked)
1480 {
1481 BlockDriverState *bs = blk_bs(blk);
1482
1483 if (bs) {
1484 bdrv_lock_medium(bs, locked);
1485 }
1486 }
1487
1488 void blk_eject(BlockBackend *blk, bool eject_flag)
1489 {
1490 BlockDriverState *bs = blk_bs(blk);
1491 char *id;
1492
1493 /* blk_eject is only called by qdevified devices */
1494 assert(!blk->legacy_dev);
1495
1496 if (bs) {
1497 bdrv_eject(bs, eject_flag);
1498 }
1499
1500 /* Whether or not we ejected on the backend,
1501 * the frontend experienced a tray event. */
1502 id = blk_get_attached_dev_id(blk);
1503 qapi_event_send_device_tray_moved(blk_name(blk), id,
1504 eject_flag, &error_abort);
1505 g_free(id);
1506 }
1507
1508 int blk_get_flags(BlockBackend *blk)
1509 {
1510 BlockDriverState *bs = blk_bs(blk);
1511
1512 if (bs) {
1513 return bdrv_get_flags(bs);
1514 } else {
1515 return blk->root_state.open_flags;
1516 }
1517 }
1518
1519 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1520 uint32_t blk_get_max_transfer(BlockBackend *blk)
1521 {
1522 BlockDriverState *bs = blk_bs(blk);
1523 uint32_t max = 0;
1524
1525 if (bs) {
1526 max = bs->bl.max_transfer;
1527 }
1528 return MIN_NON_ZERO(max, INT_MAX);
1529 }
1530
1531 int blk_get_max_iov(BlockBackend *blk)
1532 {
1533 return blk->root->bs->bl.max_iov;
1534 }
1535
1536 void blk_set_guest_block_size(BlockBackend *blk, int align)
1537 {
1538 blk->guest_block_size = align;
1539 }
1540
1541 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1542 {
1543 return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1544 }
1545
1546 void *blk_blockalign(BlockBackend *blk, size_t size)
1547 {
1548 return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1549 }
1550
1551 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1552 {
1553 BlockDriverState *bs = blk_bs(blk);
1554
1555 if (!bs) {
1556 return false;
1557 }
1558
1559 return bdrv_op_is_blocked(bs, op, errp);
1560 }
1561
1562 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1563 {
1564 BlockDriverState *bs = blk_bs(blk);
1565
1566 if (bs) {
1567 bdrv_op_unblock(bs, op, reason);
1568 }
1569 }
1570
1571 void blk_op_block_all(BlockBackend *blk, Error *reason)
1572 {
1573 BlockDriverState *bs = blk_bs(blk);
1574
1575 if (bs) {
1576 bdrv_op_block_all(bs, reason);
1577 }
1578 }
1579
1580 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1581 {
1582 BlockDriverState *bs = blk_bs(blk);
1583
1584 if (bs) {
1585 bdrv_op_unblock_all(bs, reason);
1586 }
1587 }
1588
1589 AioContext *blk_get_aio_context(BlockBackend *blk)
1590 {
1591 BlockDriverState *bs = blk_bs(blk);
1592
1593 if (bs) {
1594 return bdrv_get_aio_context(bs);
1595 } else {
1596 return qemu_get_aio_context();
1597 }
1598 }
1599
1600 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1601 {
1602 BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1603 return blk_get_aio_context(blk_acb->blk);
1604 }
1605
1606 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1607 {
1608 BlockDriverState *bs = blk_bs(blk);
1609
1610 if (bs) {
1611 if (blk->public.throttle_state) {
1612 throttle_timers_detach_aio_context(&blk->public.throttle_timers);
1613 }
1614 bdrv_set_aio_context(bs, new_context);
1615 if (blk->public.throttle_state) {
1616 throttle_timers_attach_aio_context(&blk->public.throttle_timers,
1617 new_context);
1618 }
1619 }
1620 }
1621
1622 void blk_add_aio_context_notifier(BlockBackend *blk,
1623 void (*attached_aio_context)(AioContext *new_context, void *opaque),
1624 void (*detach_aio_context)(void *opaque), void *opaque)
1625 {
1626 BlockDriverState *bs = blk_bs(blk);
1627
1628 if (bs) {
1629 bdrv_add_aio_context_notifier(bs, attached_aio_context,
1630 detach_aio_context, opaque);
1631 }
1632 }
1633
1634 void blk_remove_aio_context_notifier(BlockBackend *blk,
1635 void (*attached_aio_context)(AioContext *,
1636 void *),
1637 void (*detach_aio_context)(void *),
1638 void *opaque)
1639 {
1640 BlockDriverState *bs = blk_bs(blk);
1641
1642 if (bs) {
1643 bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1644 detach_aio_context, opaque);
1645 }
1646 }
1647
1648 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1649 {
1650 notifier_list_add(&blk->remove_bs_notifiers, notify);
1651 }
1652
1653 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1654 {
1655 notifier_list_add(&blk->insert_bs_notifiers, notify);
1656 }
1657
1658 void blk_io_plug(BlockBackend *blk)
1659 {
1660 BlockDriverState *bs = blk_bs(blk);
1661
1662 if (bs) {
1663 bdrv_io_plug(bs);
1664 }
1665 }
1666
1667 void blk_io_unplug(BlockBackend *blk)
1668 {
1669 BlockDriverState *bs = blk_bs(blk);
1670
1671 if (bs) {
1672 bdrv_io_unplug(bs);
1673 }
1674 }
1675
1676 BlockAcctStats *blk_get_stats(BlockBackend *blk)
1677 {
1678 return &blk->stats;
1679 }
1680
1681 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1682 BlockCompletionFunc *cb, void *opaque)
1683 {
1684 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1685 }
1686
1687 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1688 int count, BdrvRequestFlags flags)
1689 {
1690 return blk_co_pwritev(blk, offset, count, NULL,
1691 flags | BDRV_REQ_ZERO_WRITE);
1692 }
1693
1694 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
1695 int count)
1696 {
1697 return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
1698 BDRV_REQ_WRITE_COMPRESSED);
1699 }
1700
1701 int blk_truncate(BlockBackend *blk, int64_t offset)
1702 {
1703 if (!blk_is_available(blk)) {
1704 return -ENOMEDIUM;
1705 }
1706
1707 return bdrv_truncate(blk->root, offset);
1708 }
1709
1710 static void blk_pdiscard_entry(void *opaque)
1711 {
1712 BlkRwCo *rwco = opaque;
1713 rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
1714 }
1715
1716 int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
1717 {
1718 return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
1719 }
1720
1721 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1722 int64_t pos, int size)
1723 {
1724 int ret;
1725
1726 if (!blk_is_available(blk)) {
1727 return -ENOMEDIUM;
1728 }
1729
1730 ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1731 if (ret < 0) {
1732 return ret;
1733 }
1734
1735 if (ret == size && !blk->enable_write_cache) {
1736 ret = bdrv_flush(blk_bs(blk));
1737 }
1738
1739 return ret < 0 ? ret : size;
1740 }
1741
1742 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1743 {
1744 if (!blk_is_available(blk)) {
1745 return -ENOMEDIUM;
1746 }
1747
1748 return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1749 }
1750
1751 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1752 {
1753 if (!blk_is_available(blk)) {
1754 return -ENOMEDIUM;
1755 }
1756
1757 return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1758 }
1759
1760 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1761 {
1762 if (!blk_is_available(blk)) {
1763 return -ENOMEDIUM;
1764 }
1765
1766 return bdrv_probe_geometry(blk_bs(blk), geo);
1767 }
1768
1769 /*
1770 * Updates the BlockBackendRootState object with data from the currently
1771 * attached BlockDriverState.
1772 */
1773 void blk_update_root_state(BlockBackend *blk)
1774 {
1775 assert(blk->root);
1776
1777 blk->root_state.open_flags = blk->root->bs->open_flags;
1778 blk->root_state.read_only = blk->root->bs->read_only;
1779 blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1780 }
1781
1782 /*
1783 * Returns the detect-zeroes setting to be used for bdrv_open() of a
1784 * BlockDriverState which is supposed to inherit the root state.
1785 */
1786 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
1787 {
1788 return blk->root_state.detect_zeroes;
1789 }
1790
1791 /*
1792 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1793 * supposed to inherit the root state.
1794 */
1795 int blk_get_open_flags_from_root_state(BlockBackend *blk)
1796 {
1797 int bs_flags;
1798
1799 bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1800 bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1801
1802 return bs_flags;
1803 }
1804
1805 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1806 {
1807 return &blk->root_state;
1808 }
1809
1810 int blk_commit_all(void)
1811 {
1812 BlockBackend *blk = NULL;
1813
1814 while ((blk = blk_all_next(blk)) != NULL) {
1815 AioContext *aio_context = blk_get_aio_context(blk);
1816
1817 aio_context_acquire(aio_context);
1818 if (blk_is_inserted(blk) && blk->root->bs->backing) {
1819 int ret = bdrv_commit(blk->root->bs);
1820 if (ret < 0) {
1821 aio_context_release(aio_context);
1822 return ret;
1823 }
1824 }
1825 aio_context_release(aio_context);
1826 }
1827 return 0;
1828 }
1829
1830
1831 /* throttling disk I/O limits */
1832 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1833 {
1834 throttle_group_config(blk, cfg);
1835 }
1836
1837 void blk_io_limits_disable(BlockBackend *blk)
1838 {
1839 assert(blk->public.throttle_state);
1840 bdrv_drained_begin(blk_bs(blk));
1841 throttle_group_unregister_blk(blk);
1842 bdrv_drained_end(blk_bs(blk));
1843 }
1844
1845 /* should be called before blk_set_io_limits if a limit is set */
1846 void blk_io_limits_enable(BlockBackend *blk, const char *group)
1847 {
1848 assert(!blk->public.throttle_state);
1849 throttle_group_register_blk(blk, group);
1850 }
1851
1852 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1853 {
1854 /* this BB is not part of any group */
1855 if (!blk->public.throttle_state) {
1856 return;
1857 }
1858
1859 /* this BB is a part of the same group than the one we want */
1860 if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1861 return;
1862 }
1863
1864 /* need to change the group this bs belong to */
1865 blk_io_limits_disable(blk);
1866 blk_io_limits_enable(blk, group);
1867 }
1868
1869 static void blk_root_drained_begin(BdrvChild *child)
1870 {
1871 BlockBackend *blk = child->opaque;
1872
1873 /* Note that blk->root may not be accessible here yet if we are just
1874 * attaching to a BlockDriverState that is drained. Use child instead. */
1875
1876 if (blk->public.io_limits_disabled++ == 0) {
1877 throttle_group_restart_blk(blk);
1878 }
1879 }
1880
1881 static void blk_root_drained_end(BdrvChild *child)
1882 {
1883 BlockBackend *blk = child->opaque;
1884
1885 assert(blk->public.io_limits_disabled);
1886 --blk->public.io_limits_disabled;
1887 }