block: Drain throttling queue with BdrvChild callback
[qemu.git] / block / block-backend.c
1 /*
2 * QEMU Block backends
3 *
4 * Copyright (C) 2014-2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Markus Armbruster <armbru@redhat.com>,
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1
10 * or later. See the COPYING.LIB file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/throttle-groups.h"
18 #include "sysemu/blockdev.h"
19 #include "sysemu/sysemu.h"
20 #include "qapi-event.h"
21 #include "qemu/id.h"
22
23 /* Number of coroutines to reserve per attached device model */
24 #define COROUTINE_POOL_RESERVATION 64
25
26 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
27
28 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
29
30 struct BlockBackend {
31 char *name;
32 int refcnt;
33 BdrvChild *root;
34 DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
35 QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
36 QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
37 BlockBackendPublic public;
38
39 void *dev; /* attached device model, if any */
40 /* TODO change to DeviceState when all users are qdevified */
41 const BlockDevOps *dev_ops;
42 void *dev_opaque;
43
44 /* the block size for which the guest device expects atomicity */
45 int guest_block_size;
46
47 /* If the BDS tree is removed, some of its options are stored here (which
48 * can be used to restore those options in the new BDS on insert) */
49 BlockBackendRootState root_state;
50
51 bool enable_write_cache;
52
53 /* I/O stats (display with "info blockstats"). */
54 BlockAcctStats stats;
55
56 BlockdevOnError on_read_error, on_write_error;
57 bool iostatus_enabled;
58 BlockDeviceIoStatus iostatus;
59
60 bool allow_write_beyond_eof;
61
62 NotifierList remove_bs_notifiers, insert_bs_notifiers;
63 };
64
65 typedef struct BlockBackendAIOCB {
66 BlockAIOCB common;
67 QEMUBH *bh;
68 BlockBackend *blk;
69 int ret;
70 } BlockBackendAIOCB;
71
72 static const AIOCBInfo block_backend_aiocb_info = {
73 .get_aio_context = blk_aiocb_get_aio_context,
74 .aiocb_size = sizeof(BlockBackendAIOCB),
75 };
76
77 static void drive_info_del(DriveInfo *dinfo);
78
79 /* All BlockBackends */
80 static QTAILQ_HEAD(, BlockBackend) block_backends =
81 QTAILQ_HEAD_INITIALIZER(block_backends);
82
83 /* All BlockBackends referenced by the monitor and which are iterated through by
84 * blk_next() */
85 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
86 QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
87
88 static void blk_root_inherit_options(int *child_flags, QDict *child_options,
89 int parent_flags, QDict *parent_options)
90 {
91 /* We're not supposed to call this function for root nodes */
92 abort();
93 }
94 static void blk_root_drained_begin(BdrvChild *child);
95 static void blk_root_drained_end(BdrvChild *child);
96
97 static const BdrvChildRole child_root = {
98 .inherit_options = blk_root_inherit_options,
99
100 .drained_begin = blk_root_drained_begin,
101 .drained_end = blk_root_drained_end,
102 };
103
104 /*
105 * Create a new BlockBackend with a reference count of one.
106 * Store an error through @errp on failure, unless it's null.
107 * Return the new BlockBackend on success, null on failure.
108 */
109 BlockBackend *blk_new(Error **errp)
110 {
111 BlockBackend *blk;
112
113 blk = g_new0(BlockBackend, 1);
114 blk->refcnt = 1;
115 qemu_co_queue_init(&blk->public.throttled_reqs[0]);
116 qemu_co_queue_init(&blk->public.throttled_reqs[1]);
117
118 notifier_list_init(&blk->remove_bs_notifiers);
119 notifier_list_init(&blk->insert_bs_notifiers);
120
121 QTAILQ_INSERT_TAIL(&block_backends, blk, link);
122 return blk;
123 }
124
125 /*
126 * Create a new BlockBackend with a new BlockDriverState attached.
127 * Otherwise just like blk_new(), which see.
128 */
129 BlockBackend *blk_new_with_bs(Error **errp)
130 {
131 BlockBackend *blk;
132 BlockDriverState *bs;
133
134 blk = blk_new(errp);
135 if (!blk) {
136 return NULL;
137 }
138
139 bs = bdrv_new_root();
140 blk->root = bdrv_root_attach_child(bs, "root", &child_root);
141 blk->root->opaque = blk;
142 bs->blk = blk;
143 return blk;
144 }
145
146 /*
147 * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
148 *
149 * Just as with bdrv_open(), after having called this function the reference to
150 * @options belongs to the block layer (even on failure).
151 *
152 * TODO: Remove @filename and @flags; it should be possible to specify a whole
153 * BDS tree just by specifying the @options QDict (or @reference,
154 * alternatively). At the time of adding this function, this is not possible,
155 * though, so callers of this function have to be able to specify @filename and
156 * @flags.
157 */
158 BlockBackend *blk_new_open(const char *filename, const char *reference,
159 QDict *options, int flags, Error **errp)
160 {
161 BlockBackend *blk;
162 int ret;
163
164 blk = blk_new_with_bs(errp);
165 if (!blk) {
166 QDECREF(options);
167 return NULL;
168 }
169
170 ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
171 if (ret < 0) {
172 blk_unref(blk);
173 return NULL;
174 }
175
176 blk_set_enable_write_cache(blk, true);
177
178 return blk;
179 }
180
181 static void blk_delete(BlockBackend *blk)
182 {
183 assert(!blk->refcnt);
184 assert(!blk->name);
185 assert(!blk->dev);
186 if (blk->root) {
187 blk_remove_bs(blk);
188 }
189 assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
190 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
191 if (blk->root_state.throttle_state) {
192 g_free(blk->root_state.throttle_group);
193 throttle_group_unref(blk->root_state.throttle_state);
194 }
195 QTAILQ_REMOVE(&block_backends, blk, link);
196 drive_info_del(blk->legacy_dinfo);
197 block_acct_cleanup(&blk->stats);
198 g_free(blk);
199 }
200
201 static void drive_info_del(DriveInfo *dinfo)
202 {
203 if (!dinfo) {
204 return;
205 }
206 qemu_opts_del(dinfo->opts);
207 g_free(dinfo->serial);
208 g_free(dinfo);
209 }
210
211 int blk_get_refcnt(BlockBackend *blk)
212 {
213 return blk ? blk->refcnt : 0;
214 }
215
216 /*
217 * Increment @blk's reference count.
218 * @blk must not be null.
219 */
220 void blk_ref(BlockBackend *blk)
221 {
222 blk->refcnt++;
223 }
224
225 /*
226 * Decrement @blk's reference count.
227 * If this drops it to zero, destroy @blk.
228 * For convenience, do nothing if @blk is null.
229 */
230 void blk_unref(BlockBackend *blk)
231 {
232 if (blk) {
233 assert(blk->refcnt > 0);
234 if (!--blk->refcnt) {
235 blk_delete(blk);
236 }
237 }
238 }
239
240 /*
241 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
242 * ones which are hidden (i.e. are not referenced by the monitor).
243 */
244 static BlockBackend *blk_all_next(BlockBackend *blk)
245 {
246 return blk ? QTAILQ_NEXT(blk, link)
247 : QTAILQ_FIRST(&block_backends);
248 }
249
250 void blk_remove_all_bs(void)
251 {
252 BlockBackend *blk = NULL;
253
254 while ((blk = blk_all_next(blk)) != NULL) {
255 AioContext *ctx = blk_get_aio_context(blk);
256
257 aio_context_acquire(ctx);
258 if (blk->root) {
259 blk_remove_bs(blk);
260 }
261 aio_context_release(ctx);
262 }
263 }
264
265 /*
266 * Return the monitor-owned BlockBackend after @blk.
267 * If @blk is null, return the first one.
268 * Else, return @blk's next sibling, which may be null.
269 *
270 * To iterate over all BlockBackends, do
271 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
272 * ...
273 * }
274 */
275 BlockBackend *blk_next(BlockBackend *blk)
276 {
277 return blk ? QTAILQ_NEXT(blk, monitor_link)
278 : QTAILQ_FIRST(&monitor_block_backends);
279 }
280
281 /*
282 * Iterates over all BlockDriverStates which are attached to a BlockBackend.
283 * This function is for use by bdrv_next().
284 *
285 * @bs must be NULL or a BDS that is attached to a BB.
286 */
287 BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
288 {
289 BlockBackend *blk;
290
291 if (bs) {
292 assert(bs->blk);
293 blk = bs->blk;
294 } else {
295 blk = NULL;
296 }
297
298 do {
299 blk = blk_all_next(blk);
300 } while (blk && !blk->root);
301
302 return blk ? blk->root->bs : NULL;
303 }
304
305 /*
306 * Add a BlockBackend into the list of backends referenced by the monitor, with
307 * the given @name acting as the handle for the monitor.
308 * Strictly for use by blockdev.c.
309 *
310 * @name must not be null or empty.
311 *
312 * Returns true on success and false on failure. In the latter case, an Error
313 * object is returned through @errp.
314 */
315 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
316 {
317 assert(!blk->name);
318 assert(name && name[0]);
319
320 if (!id_wellformed(name)) {
321 error_setg(errp, "Invalid device name");
322 return false;
323 }
324 if (blk_by_name(name)) {
325 error_setg(errp, "Device with id '%s' already exists", name);
326 return false;
327 }
328 if (bdrv_find_node(name)) {
329 error_setg(errp,
330 "Device name '%s' conflicts with an existing node name",
331 name);
332 return false;
333 }
334
335 blk->name = g_strdup(name);
336 QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
337 return true;
338 }
339
340 /*
341 * Remove a BlockBackend from the list of backends referenced by the monitor.
342 * Strictly for use by blockdev.c.
343 */
344 void monitor_remove_blk(BlockBackend *blk)
345 {
346 if (!blk->name) {
347 return;
348 }
349
350 QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
351 g_free(blk->name);
352 blk->name = NULL;
353 }
354
355 /*
356 * Return @blk's name, a non-null string.
357 * Returns an empty string iff @blk is not referenced by the monitor.
358 */
359 const char *blk_name(BlockBackend *blk)
360 {
361 return blk->name ?: "";
362 }
363
364 /*
365 * Return the BlockBackend with name @name if it exists, else null.
366 * @name must not be null.
367 */
368 BlockBackend *blk_by_name(const char *name)
369 {
370 BlockBackend *blk = NULL;
371
372 assert(name);
373 while ((blk = blk_next(blk)) != NULL) {
374 if (!strcmp(name, blk->name)) {
375 return blk;
376 }
377 }
378 return NULL;
379 }
380
381 /*
382 * Return the BlockDriverState attached to @blk if any, else null.
383 */
384 BlockDriverState *blk_bs(BlockBackend *blk)
385 {
386 return blk->root ? blk->root->bs : NULL;
387 }
388
389 /*
390 * Return @blk's DriveInfo if any, else null.
391 */
392 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
393 {
394 return blk->legacy_dinfo;
395 }
396
397 /*
398 * Set @blk's DriveInfo to @dinfo, and return it.
399 * @blk must not have a DriveInfo set already.
400 * No other BlockBackend may have the same DriveInfo set.
401 */
402 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
403 {
404 assert(!blk->legacy_dinfo);
405 return blk->legacy_dinfo = dinfo;
406 }
407
408 /*
409 * Return the BlockBackend with DriveInfo @dinfo.
410 * It must exist.
411 */
412 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
413 {
414 BlockBackend *blk = NULL;
415
416 while ((blk = blk_next(blk)) != NULL) {
417 if (blk->legacy_dinfo == dinfo) {
418 return blk;
419 }
420 }
421 abort();
422 }
423
424 /*
425 * Returns a pointer to the publicly accessible fields of @blk.
426 */
427 BlockBackendPublic *blk_get_public(BlockBackend *blk)
428 {
429 return &blk->public;
430 }
431
432 /*
433 * Returns a BlockBackend given the associated @public fields.
434 */
435 BlockBackend *blk_by_public(BlockBackendPublic *public)
436 {
437 return container_of(public, BlockBackend, public);
438 }
439
440 /*
441 * Disassociates the currently associated BlockDriverState from @blk.
442 */
443 void blk_remove_bs(BlockBackend *blk)
444 {
445 assert(blk->root->bs->blk == blk);
446
447 notifier_list_notify(&blk->remove_bs_notifiers, blk);
448
449 blk_update_root_state(blk);
450 if (blk->public.throttle_state) {
451 blk_io_limits_disable(blk);
452 }
453
454 blk->root->bs->blk = NULL;
455 bdrv_root_unref_child(blk->root);
456 blk->root = NULL;
457 }
458
459 /*
460 * Associates a new BlockDriverState with @blk.
461 */
462 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
463 {
464 assert(!blk->root && !bs->blk);
465 bdrv_ref(bs);
466 blk->root = bdrv_root_attach_child(bs, "root", &child_root);
467 blk->root->opaque = blk;
468 bs->blk = blk;
469
470 notifier_list_notify(&blk->insert_bs_notifiers, blk);
471 }
472
473 /*
474 * Attach device model @dev to @blk.
475 * Return 0 on success, -EBUSY when a device model is attached already.
476 */
477 int blk_attach_dev(BlockBackend *blk, void *dev)
478 /* TODO change to DeviceState *dev when all users are qdevified */
479 {
480 if (blk->dev) {
481 return -EBUSY;
482 }
483 blk_ref(blk);
484 blk->dev = dev;
485 blk_iostatus_reset(blk);
486 return 0;
487 }
488
489 /*
490 * Attach device model @dev to @blk.
491 * @blk must not have a device model attached already.
492 * TODO qdevified devices don't use this, remove when devices are qdevified
493 */
494 void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
495 {
496 if (blk_attach_dev(blk, dev) < 0) {
497 abort();
498 }
499 }
500
501 /*
502 * Detach device model @dev from @blk.
503 * @dev must be currently attached to @blk.
504 */
505 void blk_detach_dev(BlockBackend *blk, void *dev)
506 /* TODO change to DeviceState *dev when all users are qdevified */
507 {
508 assert(blk->dev == dev);
509 blk->dev = NULL;
510 blk->dev_ops = NULL;
511 blk->dev_opaque = NULL;
512 blk->guest_block_size = 512;
513 blk_unref(blk);
514 }
515
516 /*
517 * Return the device model attached to @blk if any, else null.
518 */
519 void *blk_get_attached_dev(BlockBackend *blk)
520 /* TODO change to return DeviceState * when all users are qdevified */
521 {
522 return blk->dev;
523 }
524
525 /*
526 * Set @blk's device model callbacks to @ops.
527 * @opaque is the opaque argument to pass to the callbacks.
528 * This is for use by device models.
529 */
530 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
531 void *opaque)
532 {
533 blk->dev_ops = ops;
534 blk->dev_opaque = opaque;
535 }
536
537 /*
538 * Notify @blk's attached device model of media change.
539 * If @load is true, notify of media load.
540 * Else, notify of media eject.
541 * Also send DEVICE_TRAY_MOVED events as appropriate.
542 */
543 void blk_dev_change_media_cb(BlockBackend *blk, bool load)
544 {
545 if (blk->dev_ops && blk->dev_ops->change_media_cb) {
546 bool tray_was_open, tray_is_open;
547
548 tray_was_open = blk_dev_is_tray_open(blk);
549 blk->dev_ops->change_media_cb(blk->dev_opaque, load);
550 tray_is_open = blk_dev_is_tray_open(blk);
551
552 if (tray_was_open != tray_is_open) {
553 qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
554 &error_abort);
555 }
556 }
557 }
558
559 /*
560 * Does @blk's attached device model have removable media?
561 * %true if no device model is attached.
562 */
563 bool blk_dev_has_removable_media(BlockBackend *blk)
564 {
565 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
566 }
567
568 /*
569 * Does @blk's attached device model have a tray?
570 */
571 bool blk_dev_has_tray(BlockBackend *blk)
572 {
573 return blk->dev_ops && blk->dev_ops->is_tray_open;
574 }
575
576 /*
577 * Notify @blk's attached device model of a media eject request.
578 * If @force is true, the medium is about to be yanked out forcefully.
579 */
580 void blk_dev_eject_request(BlockBackend *blk, bool force)
581 {
582 if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
583 blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
584 }
585 }
586
587 /*
588 * Does @blk's attached device model have a tray, and is it open?
589 */
590 bool blk_dev_is_tray_open(BlockBackend *blk)
591 {
592 if (blk_dev_has_tray(blk)) {
593 return blk->dev_ops->is_tray_open(blk->dev_opaque);
594 }
595 return false;
596 }
597
598 /*
599 * Does @blk's attached device model have the medium locked?
600 * %false if the device model has no such lock.
601 */
602 bool blk_dev_is_medium_locked(BlockBackend *blk)
603 {
604 if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
605 return blk->dev_ops->is_medium_locked(blk->dev_opaque);
606 }
607 return false;
608 }
609
610 /*
611 * Notify @blk's attached device model of a backend size change.
612 */
613 void blk_dev_resize_cb(BlockBackend *blk)
614 {
615 if (blk->dev_ops && blk->dev_ops->resize_cb) {
616 blk->dev_ops->resize_cb(blk->dev_opaque);
617 }
618 }
619
620 void blk_iostatus_enable(BlockBackend *blk)
621 {
622 blk->iostatus_enabled = true;
623 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
624 }
625
626 /* The I/O status is only enabled if the drive explicitly
627 * enables it _and_ the VM is configured to stop on errors */
628 bool blk_iostatus_is_enabled(const BlockBackend *blk)
629 {
630 return (blk->iostatus_enabled &&
631 (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
632 blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
633 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
634 }
635
636 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
637 {
638 return blk->iostatus;
639 }
640
641 void blk_iostatus_disable(BlockBackend *blk)
642 {
643 blk->iostatus_enabled = false;
644 }
645
646 void blk_iostatus_reset(BlockBackend *blk)
647 {
648 if (blk_iostatus_is_enabled(blk)) {
649 BlockDriverState *bs = blk_bs(blk);
650 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
651 if (bs && bs->job) {
652 block_job_iostatus_reset(bs->job);
653 }
654 }
655 }
656
657 void blk_iostatus_set_err(BlockBackend *blk, int error)
658 {
659 assert(blk_iostatus_is_enabled(blk));
660 if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
661 blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
662 BLOCK_DEVICE_IO_STATUS_FAILED;
663 }
664 }
665
666 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
667 {
668 blk->allow_write_beyond_eof = allow;
669 }
670
671 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
672 size_t size)
673 {
674 int64_t len;
675
676 if (size > INT_MAX) {
677 return -EIO;
678 }
679
680 if (!blk_is_available(blk)) {
681 return -ENOMEDIUM;
682 }
683
684 if (offset < 0) {
685 return -EIO;
686 }
687
688 if (!blk->allow_write_beyond_eof) {
689 len = blk_getlength(blk);
690 if (len < 0) {
691 return len;
692 }
693
694 if (offset > len || len - offset < size) {
695 return -EIO;
696 }
697 }
698
699 return 0;
700 }
701
702 static int blk_check_request(BlockBackend *blk, int64_t sector_num,
703 int nb_sectors)
704 {
705 if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
706 return -EIO;
707 }
708
709 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
710 return -EIO;
711 }
712
713 return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
714 nb_sectors * BDRV_SECTOR_SIZE);
715 }
716
717 static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
718 unsigned int bytes, QEMUIOVector *qiov,
719 BdrvRequestFlags flags)
720 {
721 int ret = blk_check_byte_request(blk, offset, bytes);
722 if (ret < 0) {
723 return ret;
724 }
725
726 /* throttling disk I/O */
727 if (blk->public.throttle_state) {
728 throttle_group_co_io_limits_intercept(blk, bytes, false);
729 }
730
731 return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
732 }
733
734 static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
735 unsigned int bytes, QEMUIOVector *qiov,
736 BdrvRequestFlags flags)
737 {
738 int ret;
739
740 ret = blk_check_byte_request(blk, offset, bytes);
741 if (ret < 0) {
742 return ret;
743 }
744
745 /* throttling disk I/O */
746 if (blk->public.throttle_state) {
747 throttle_group_co_io_limits_intercept(blk, bytes, true);
748 }
749
750 if (!blk->enable_write_cache) {
751 flags |= BDRV_REQ_FUA;
752 }
753
754 return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
755 }
756
757 typedef struct BlkRwCo {
758 BlockBackend *blk;
759 int64_t offset;
760 QEMUIOVector *qiov;
761 int ret;
762 BdrvRequestFlags flags;
763 } BlkRwCo;
764
765 static void blk_read_entry(void *opaque)
766 {
767 BlkRwCo *rwco = opaque;
768
769 rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
770 rwco->qiov, rwco->flags);
771 }
772
773 static void blk_write_entry(void *opaque)
774 {
775 BlkRwCo *rwco = opaque;
776
777 rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
778 rwco->qiov, rwco->flags);
779 }
780
781 static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
782 int64_t bytes, CoroutineEntry co_entry,
783 BdrvRequestFlags flags)
784 {
785 AioContext *aio_context;
786 QEMUIOVector qiov;
787 struct iovec iov;
788 Coroutine *co;
789 BlkRwCo rwco;
790
791 iov = (struct iovec) {
792 .iov_base = buf,
793 .iov_len = bytes,
794 };
795 qemu_iovec_init_external(&qiov, &iov, 1);
796
797 rwco = (BlkRwCo) {
798 .blk = blk,
799 .offset = offset,
800 .qiov = &qiov,
801 .flags = flags,
802 .ret = NOT_DONE,
803 };
804
805 co = qemu_coroutine_create(co_entry);
806 qemu_coroutine_enter(co, &rwco);
807
808 aio_context = blk_get_aio_context(blk);
809 while (rwco.ret == NOT_DONE) {
810 aio_poll(aio_context, true);
811 }
812
813 return rwco.ret;
814 }
815
816 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
817 int count)
818 {
819 int ret;
820
821 ret = blk_check_byte_request(blk, offset, count);
822 if (ret < 0) {
823 return ret;
824 }
825
826 blk_root_drained_begin(blk->root);
827 ret = blk_pread(blk, offset, buf, count);
828 blk_root_drained_end(blk->root);
829 return ret;
830 }
831
832 int blk_write_zeroes(BlockBackend *blk, int64_t offset,
833 int count, BdrvRequestFlags flags)
834 {
835 return blk_prw(blk, offset, NULL, count, blk_write_entry,
836 flags | BDRV_REQ_ZERO_WRITE);
837 }
838
839 static void error_callback_bh(void *opaque)
840 {
841 struct BlockBackendAIOCB *acb = opaque;
842 qemu_bh_delete(acb->bh);
843 acb->common.cb(acb->common.opaque, acb->ret);
844 qemu_aio_unref(acb);
845 }
846
847 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
848 BlockCompletionFunc *cb,
849 void *opaque, int ret)
850 {
851 struct BlockBackendAIOCB *acb;
852 QEMUBH *bh;
853
854 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
855 acb->blk = blk;
856 acb->ret = ret;
857
858 bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
859 acb->bh = bh;
860 qemu_bh_schedule(bh);
861
862 return &acb->common;
863 }
864
865 typedef struct BlkAioEmAIOCB {
866 BlockAIOCB common;
867 BlkRwCo rwco;
868 int bytes;
869 bool has_returned;
870 QEMUBH* bh;
871 } BlkAioEmAIOCB;
872
873 static const AIOCBInfo blk_aio_em_aiocb_info = {
874 .aiocb_size = sizeof(BlkAioEmAIOCB),
875 };
876
877 static void blk_aio_complete(BlkAioEmAIOCB *acb)
878 {
879 if (acb->bh) {
880 assert(acb->has_returned);
881 qemu_bh_delete(acb->bh);
882 }
883 if (acb->has_returned) {
884 acb->common.cb(acb->common.opaque, acb->rwco.ret);
885 qemu_aio_unref(acb);
886 }
887 }
888
889 static void blk_aio_complete_bh(void *opaque)
890 {
891 blk_aio_complete(opaque);
892 }
893
894 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
895 QEMUIOVector *qiov, CoroutineEntry co_entry,
896 BdrvRequestFlags flags,
897 BlockCompletionFunc *cb, void *opaque)
898 {
899 BlkAioEmAIOCB *acb;
900 Coroutine *co;
901
902 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
903 acb->rwco = (BlkRwCo) {
904 .blk = blk,
905 .offset = offset,
906 .qiov = qiov,
907 .flags = flags,
908 .ret = NOT_DONE,
909 };
910 acb->bytes = bytes;
911 acb->bh = NULL;
912 acb->has_returned = false;
913
914 co = qemu_coroutine_create(co_entry);
915 qemu_coroutine_enter(co, acb);
916
917 acb->has_returned = true;
918 if (acb->rwco.ret != NOT_DONE) {
919 acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
920 qemu_bh_schedule(acb->bh);
921 }
922
923 return &acb->common;
924 }
925
926 static void blk_aio_read_entry(void *opaque)
927 {
928 BlkAioEmAIOCB *acb = opaque;
929 BlkRwCo *rwco = &acb->rwco;
930
931 assert(rwco->qiov->size == acb->bytes);
932 rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
933 rwco->qiov, rwco->flags);
934 blk_aio_complete(acb);
935 }
936
937 static void blk_aio_write_entry(void *opaque)
938 {
939 BlkAioEmAIOCB *acb = opaque;
940 BlkRwCo *rwco = &acb->rwco;
941
942 assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
943 rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
944 rwco->qiov, rwco->flags);
945 blk_aio_complete(acb);
946 }
947
948 BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
949 int count, BdrvRequestFlags flags,
950 BlockCompletionFunc *cb, void *opaque)
951 {
952 return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
953 flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
954 }
955
956 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
957 {
958 int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
959 if (ret < 0) {
960 return ret;
961 }
962 return count;
963 }
964
965 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
966 BdrvRequestFlags flags)
967 {
968 int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
969 flags);
970 if (ret < 0) {
971 return ret;
972 }
973 return count;
974 }
975
976 int64_t blk_getlength(BlockBackend *blk)
977 {
978 if (!blk_is_available(blk)) {
979 return -ENOMEDIUM;
980 }
981
982 return bdrv_getlength(blk_bs(blk));
983 }
984
985 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
986 {
987 if (!blk_bs(blk)) {
988 *nb_sectors_ptr = 0;
989 } else {
990 bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
991 }
992 }
993
994 int64_t blk_nb_sectors(BlockBackend *blk)
995 {
996 if (!blk_is_available(blk)) {
997 return -ENOMEDIUM;
998 }
999
1000 return bdrv_nb_sectors(blk_bs(blk));
1001 }
1002
1003 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1004 QEMUIOVector *qiov, BdrvRequestFlags flags,
1005 BlockCompletionFunc *cb, void *opaque)
1006 {
1007 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1008 blk_aio_read_entry, flags, cb, opaque);
1009 }
1010
1011 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1012 QEMUIOVector *qiov, BdrvRequestFlags flags,
1013 BlockCompletionFunc *cb, void *opaque)
1014 {
1015 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1016 blk_aio_write_entry, flags, cb, opaque);
1017 }
1018
1019 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1020 BlockCompletionFunc *cb, void *opaque)
1021 {
1022 if (!blk_is_available(blk)) {
1023 return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1024 }
1025
1026 return bdrv_aio_flush(blk_bs(blk), cb, opaque);
1027 }
1028
1029 BlockAIOCB *blk_aio_discard(BlockBackend *blk,
1030 int64_t sector_num, int nb_sectors,
1031 BlockCompletionFunc *cb, void *opaque)
1032 {
1033 int ret = blk_check_request(blk, sector_num, nb_sectors);
1034 if (ret < 0) {
1035 return blk_abort_aio_request(blk, cb, opaque, ret);
1036 }
1037
1038 return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
1039 }
1040
1041 void blk_aio_cancel(BlockAIOCB *acb)
1042 {
1043 bdrv_aio_cancel(acb);
1044 }
1045
1046 void blk_aio_cancel_async(BlockAIOCB *acb)
1047 {
1048 bdrv_aio_cancel_async(acb);
1049 }
1050
1051 int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
1052 {
1053 int i, ret;
1054
1055 for (i = 0; i < num_reqs; i++) {
1056 ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
1057 if (ret < 0) {
1058 return ret;
1059 }
1060 }
1061
1062 return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
1063 }
1064
1065 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1066 {
1067 if (!blk_is_available(blk)) {
1068 return -ENOMEDIUM;
1069 }
1070
1071 return bdrv_ioctl(blk_bs(blk), req, buf);
1072 }
1073
1074 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1075 BlockCompletionFunc *cb, void *opaque)
1076 {
1077 if (!blk_is_available(blk)) {
1078 return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
1079 }
1080
1081 return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
1082 }
1083
1084 int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
1085 {
1086 int ret = blk_check_request(blk, sector_num, nb_sectors);
1087 if (ret < 0) {
1088 return ret;
1089 }
1090
1091 return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
1092 }
1093
1094 int blk_co_flush(BlockBackend *blk)
1095 {
1096 if (!blk_is_available(blk)) {
1097 return -ENOMEDIUM;
1098 }
1099
1100 return bdrv_co_flush(blk_bs(blk));
1101 }
1102
1103 int blk_flush(BlockBackend *blk)
1104 {
1105 if (!blk_is_available(blk)) {
1106 return -ENOMEDIUM;
1107 }
1108
1109 return bdrv_flush(blk_bs(blk));
1110 }
1111
1112 void blk_drain(BlockBackend *blk)
1113 {
1114 if (blk_bs(blk)) {
1115 bdrv_drain(blk_bs(blk));
1116 }
1117 }
1118
1119 void blk_drain_all(void)
1120 {
1121 bdrv_drain_all();
1122 }
1123
1124 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1125 BlockdevOnError on_write_error)
1126 {
1127 blk->on_read_error = on_read_error;
1128 blk->on_write_error = on_write_error;
1129 }
1130
1131 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1132 {
1133 return is_read ? blk->on_read_error : blk->on_write_error;
1134 }
1135
1136 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1137 int error)
1138 {
1139 BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1140
1141 switch (on_err) {
1142 case BLOCKDEV_ON_ERROR_ENOSPC:
1143 return (error == ENOSPC) ?
1144 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1145 case BLOCKDEV_ON_ERROR_STOP:
1146 return BLOCK_ERROR_ACTION_STOP;
1147 case BLOCKDEV_ON_ERROR_REPORT:
1148 return BLOCK_ERROR_ACTION_REPORT;
1149 case BLOCKDEV_ON_ERROR_IGNORE:
1150 return BLOCK_ERROR_ACTION_IGNORE;
1151 default:
1152 abort();
1153 }
1154 }
1155
1156 static void send_qmp_error_event(BlockBackend *blk,
1157 BlockErrorAction action,
1158 bool is_read, int error)
1159 {
1160 IoOperationType optype;
1161
1162 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1163 qapi_event_send_block_io_error(blk_name(blk), optype, action,
1164 blk_iostatus_is_enabled(blk),
1165 error == ENOSPC, strerror(error),
1166 &error_abort);
1167 }
1168
1169 /* This is done by device models because, while the block layer knows
1170 * about the error, it does not know whether an operation comes from
1171 * the device or the block layer (from a job, for example).
1172 */
1173 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1174 bool is_read, int error)
1175 {
1176 assert(error >= 0);
1177
1178 if (action == BLOCK_ERROR_ACTION_STOP) {
1179 /* First set the iostatus, so that "info block" returns an iostatus
1180 * that matches the events raised so far (an additional error iostatus
1181 * is fine, but not a lost one).
1182 */
1183 blk_iostatus_set_err(blk, error);
1184
1185 /* Then raise the request to stop the VM and the event.
1186 * qemu_system_vmstop_request_prepare has two effects. First,
1187 * it ensures that the STOP event always comes after the
1188 * BLOCK_IO_ERROR event. Second, it ensures that even if management
1189 * can observe the STOP event and do a "cont" before the STOP
1190 * event is issued, the VM will not stop. In this case, vm_start()
1191 * also ensures that the STOP/RESUME pair of events is emitted.
1192 */
1193 qemu_system_vmstop_request_prepare();
1194 send_qmp_error_event(blk, action, is_read, error);
1195 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1196 } else {
1197 send_qmp_error_event(blk, action, is_read, error);
1198 }
1199 }
1200
1201 int blk_is_read_only(BlockBackend *blk)
1202 {
1203 BlockDriverState *bs = blk_bs(blk);
1204
1205 if (bs) {
1206 return bdrv_is_read_only(bs);
1207 } else {
1208 return blk->root_state.read_only;
1209 }
1210 }
1211
1212 int blk_is_sg(BlockBackend *blk)
1213 {
1214 BlockDriverState *bs = blk_bs(blk);
1215
1216 if (!bs) {
1217 return 0;
1218 }
1219
1220 return bdrv_is_sg(bs);
1221 }
1222
1223 int blk_enable_write_cache(BlockBackend *blk)
1224 {
1225 return blk->enable_write_cache;
1226 }
1227
1228 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1229 {
1230 blk->enable_write_cache = wce;
1231 }
1232
1233 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1234 {
1235 BlockDriverState *bs = blk_bs(blk);
1236
1237 if (!bs) {
1238 error_setg(errp, "Device '%s' has no medium", blk->name);
1239 return;
1240 }
1241
1242 bdrv_invalidate_cache(bs, errp);
1243 }
1244
1245 bool blk_is_inserted(BlockBackend *blk)
1246 {
1247 BlockDriverState *bs = blk_bs(blk);
1248
1249 return bs && bdrv_is_inserted(bs);
1250 }
1251
1252 bool blk_is_available(BlockBackend *blk)
1253 {
1254 return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1255 }
1256
1257 void blk_lock_medium(BlockBackend *blk, bool locked)
1258 {
1259 BlockDriverState *bs = blk_bs(blk);
1260
1261 if (bs) {
1262 bdrv_lock_medium(bs, locked);
1263 }
1264 }
1265
1266 void blk_eject(BlockBackend *blk, bool eject_flag)
1267 {
1268 BlockDriverState *bs = blk_bs(blk);
1269
1270 if (bs) {
1271 bdrv_eject(bs, eject_flag);
1272 }
1273 }
1274
1275 int blk_get_flags(BlockBackend *blk)
1276 {
1277 BlockDriverState *bs = blk_bs(blk);
1278
1279 if (bs) {
1280 return bdrv_get_flags(bs);
1281 } else {
1282 return blk->root_state.open_flags;
1283 }
1284 }
1285
1286 int blk_get_max_transfer_length(BlockBackend *blk)
1287 {
1288 BlockDriverState *bs = blk_bs(blk);
1289
1290 if (bs) {
1291 return bs->bl.max_transfer_length;
1292 } else {
1293 return 0;
1294 }
1295 }
1296
1297 int blk_get_max_iov(BlockBackend *blk)
1298 {
1299 return blk->root->bs->bl.max_iov;
1300 }
1301
1302 void blk_set_guest_block_size(BlockBackend *blk, int align)
1303 {
1304 blk->guest_block_size = align;
1305 }
1306
1307 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1308 {
1309 return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1310 }
1311
1312 void *blk_blockalign(BlockBackend *blk, size_t size)
1313 {
1314 return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1315 }
1316
1317 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1318 {
1319 BlockDriverState *bs = blk_bs(blk);
1320
1321 if (!bs) {
1322 return false;
1323 }
1324
1325 return bdrv_op_is_blocked(bs, op, errp);
1326 }
1327
1328 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1329 {
1330 BlockDriverState *bs = blk_bs(blk);
1331
1332 if (bs) {
1333 bdrv_op_unblock(bs, op, reason);
1334 }
1335 }
1336
1337 void blk_op_block_all(BlockBackend *blk, Error *reason)
1338 {
1339 BlockDriverState *bs = blk_bs(blk);
1340
1341 if (bs) {
1342 bdrv_op_block_all(bs, reason);
1343 }
1344 }
1345
1346 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
1347 {
1348 BlockDriverState *bs = blk_bs(blk);
1349
1350 if (bs) {
1351 bdrv_op_unblock_all(bs, reason);
1352 }
1353 }
1354
1355 AioContext *blk_get_aio_context(BlockBackend *blk)
1356 {
1357 BlockDriverState *bs = blk_bs(blk);
1358
1359 if (bs) {
1360 return bdrv_get_aio_context(bs);
1361 } else {
1362 return qemu_get_aio_context();
1363 }
1364 }
1365
1366 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
1367 {
1368 BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
1369 return blk_get_aio_context(blk_acb->blk);
1370 }
1371
1372 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
1373 {
1374 BlockDriverState *bs = blk_bs(blk);
1375
1376 if (bs) {
1377 bdrv_set_aio_context(bs, new_context);
1378 }
1379 }
1380
1381 void blk_add_aio_context_notifier(BlockBackend *blk,
1382 void (*attached_aio_context)(AioContext *new_context, void *opaque),
1383 void (*detach_aio_context)(void *opaque), void *opaque)
1384 {
1385 BlockDriverState *bs = blk_bs(blk);
1386
1387 if (bs) {
1388 bdrv_add_aio_context_notifier(bs, attached_aio_context,
1389 detach_aio_context, opaque);
1390 }
1391 }
1392
1393 void blk_remove_aio_context_notifier(BlockBackend *blk,
1394 void (*attached_aio_context)(AioContext *,
1395 void *),
1396 void (*detach_aio_context)(void *),
1397 void *opaque)
1398 {
1399 BlockDriverState *bs = blk_bs(blk);
1400
1401 if (bs) {
1402 bdrv_remove_aio_context_notifier(bs, attached_aio_context,
1403 detach_aio_context, opaque);
1404 }
1405 }
1406
1407 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
1408 {
1409 notifier_list_add(&blk->remove_bs_notifiers, notify);
1410 }
1411
1412 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
1413 {
1414 notifier_list_add(&blk->insert_bs_notifiers, notify);
1415 }
1416
1417 void blk_io_plug(BlockBackend *blk)
1418 {
1419 BlockDriverState *bs = blk_bs(blk);
1420
1421 if (bs) {
1422 bdrv_io_plug(bs);
1423 }
1424 }
1425
1426 void blk_io_unplug(BlockBackend *blk)
1427 {
1428 BlockDriverState *bs = blk_bs(blk);
1429
1430 if (bs) {
1431 bdrv_io_unplug(bs);
1432 }
1433 }
1434
1435 BlockAcctStats *blk_get_stats(BlockBackend *blk)
1436 {
1437 return &blk->stats;
1438 }
1439
1440 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
1441 BlockCompletionFunc *cb, void *opaque)
1442 {
1443 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
1444 }
1445
1446 int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
1447 int count, BdrvRequestFlags flags)
1448 {
1449 return blk_co_pwritev(blk, offset, count, NULL,
1450 flags | BDRV_REQ_ZERO_WRITE);
1451 }
1452
1453 int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
1454 const uint8_t *buf, int nb_sectors)
1455 {
1456 int ret = blk_check_request(blk, sector_num, nb_sectors);
1457 if (ret < 0) {
1458 return ret;
1459 }
1460
1461 return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
1462 }
1463
1464 int blk_truncate(BlockBackend *blk, int64_t offset)
1465 {
1466 if (!blk_is_available(blk)) {
1467 return -ENOMEDIUM;
1468 }
1469
1470 return bdrv_truncate(blk_bs(blk), offset);
1471 }
1472
1473 int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
1474 {
1475 int ret = blk_check_request(blk, sector_num, nb_sectors);
1476 if (ret < 0) {
1477 return ret;
1478 }
1479
1480 return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
1481 }
1482
1483 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
1484 int64_t pos, int size)
1485 {
1486 int ret;
1487
1488 if (!blk_is_available(blk)) {
1489 return -ENOMEDIUM;
1490 }
1491
1492 ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
1493 if (ret < 0) {
1494 return ret;
1495 }
1496
1497 if (ret == size && !blk->enable_write_cache) {
1498 ret = bdrv_flush(blk_bs(blk));
1499 }
1500
1501 return ret < 0 ? ret : size;
1502 }
1503
1504 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
1505 {
1506 if (!blk_is_available(blk)) {
1507 return -ENOMEDIUM;
1508 }
1509
1510 return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
1511 }
1512
1513 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
1514 {
1515 if (!blk_is_available(blk)) {
1516 return -ENOMEDIUM;
1517 }
1518
1519 return bdrv_probe_blocksizes(blk_bs(blk), bsz);
1520 }
1521
1522 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
1523 {
1524 if (!blk_is_available(blk)) {
1525 return -ENOMEDIUM;
1526 }
1527
1528 return bdrv_probe_geometry(blk_bs(blk), geo);
1529 }
1530
1531 /*
1532 * Updates the BlockBackendRootState object with data from the currently
1533 * attached BlockDriverState.
1534 */
1535 void blk_update_root_state(BlockBackend *blk)
1536 {
1537 assert(blk->root);
1538
1539 blk->root_state.open_flags = blk->root->bs->open_flags;
1540 blk->root_state.read_only = blk->root->bs->read_only;
1541 blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
1542
1543 if (blk->root_state.throttle_group) {
1544 g_free(blk->root_state.throttle_group);
1545 throttle_group_unref(blk->root_state.throttle_state);
1546 }
1547 if (blk->public.throttle_state) {
1548 const char *name = throttle_group_get_name(blk);
1549 blk->root_state.throttle_group = g_strdup(name);
1550 blk->root_state.throttle_state = throttle_group_incref(name);
1551 } else {
1552 blk->root_state.throttle_group = NULL;
1553 blk->root_state.throttle_state = NULL;
1554 }
1555 }
1556
1557 /*
1558 * Applies the information in the root state to the given BlockDriverState. This
1559 * does not include the flags which have to be specified for bdrv_open(), use
1560 * blk_get_open_flags_from_root_state() to inquire them.
1561 */
1562 void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
1563 {
1564 bs->detect_zeroes = blk->root_state.detect_zeroes;
1565 if (blk->root_state.throttle_group) {
1566 blk_io_limits_enable(blk, blk->root_state.throttle_group);
1567 }
1568 }
1569
1570 /*
1571 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
1572 * supposed to inherit the root state.
1573 */
1574 int blk_get_open_flags_from_root_state(BlockBackend *blk)
1575 {
1576 int bs_flags;
1577
1578 bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
1579 bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
1580
1581 return bs_flags;
1582 }
1583
1584 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
1585 {
1586 return &blk->root_state;
1587 }
1588
1589 int blk_commit_all(void)
1590 {
1591 BlockBackend *blk = NULL;
1592
1593 while ((blk = blk_all_next(blk)) != NULL) {
1594 AioContext *aio_context = blk_get_aio_context(blk);
1595
1596 aio_context_acquire(aio_context);
1597 if (blk_is_inserted(blk) && blk->root->bs->backing) {
1598 int ret = bdrv_commit(blk->root->bs);
1599 if (ret < 0) {
1600 aio_context_release(aio_context);
1601 return ret;
1602 }
1603 }
1604 aio_context_release(aio_context);
1605 }
1606 return 0;
1607 }
1608
1609 int blk_flush_all(void)
1610 {
1611 BlockBackend *blk = NULL;
1612 int result = 0;
1613
1614 while ((blk = blk_all_next(blk)) != NULL) {
1615 AioContext *aio_context = blk_get_aio_context(blk);
1616 int ret;
1617
1618 aio_context_acquire(aio_context);
1619 if (blk_is_inserted(blk)) {
1620 ret = blk_flush(blk);
1621 if (ret < 0 && !result) {
1622 result = ret;
1623 }
1624 }
1625 aio_context_release(aio_context);
1626 }
1627
1628 return result;
1629 }
1630
1631
1632 /* throttling disk I/O limits */
1633 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
1634 {
1635 throttle_group_config(blk, cfg);
1636 }
1637
1638 void blk_io_limits_disable(BlockBackend *blk)
1639 {
1640 assert(blk->public.throttle_state);
1641 bdrv_drained_begin(blk_bs(blk));
1642 throttle_group_unregister_blk(blk);
1643 bdrv_drained_end(blk_bs(blk));
1644 }
1645
1646 /* should be called before blk_set_io_limits if a limit is set */
1647 void blk_io_limits_enable(BlockBackend *blk, const char *group)
1648 {
1649 assert(!blk->public.throttle_state);
1650 throttle_group_register_blk(blk, group);
1651 }
1652
1653 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
1654 {
1655 /* this BB is not part of any group */
1656 if (!blk->public.throttle_state) {
1657 return;
1658 }
1659
1660 /* this BB is a part of the same group than the one we want */
1661 if (!g_strcmp0(throttle_group_get_name(blk), group)) {
1662 return;
1663 }
1664
1665 /* need to change the group this bs belong to */
1666 blk_io_limits_disable(blk);
1667 blk_io_limits_enable(blk, group);
1668 }
1669
1670 static void blk_root_drained_begin(BdrvChild *child)
1671 {
1672 BlockBackend *blk = child->opaque;
1673
1674 if (blk->public.io_limits_disabled++ == 0) {
1675 throttle_group_restart_blk(blk);
1676 }
1677 }
1678
1679 static void blk_root_drained_end(BdrvChild *child)
1680 {
1681 BlockBackend *blk = child->opaque;
1682
1683 assert(blk->public.io_limits_disabled);
1684 --blk->public.io_limits_disabled;
1685 }