Update VERSION for v7.2.0-rc4
[qemu.git] / block.c
1 /*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2020 Virtuozzo International GmbH.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27 #include "block/trace.h"
28 #include "block/block_int.h"
29 #include "block/blockjob.h"
30 #include "block/fuse.h"
31 #include "block/nbd.h"
32 #include "block/qdict.h"
33 #include "qemu/error-report.h"
34 #include "block/module_block.h"
35 #include "qemu/main-loop.h"
36 #include "qemu/module.h"
37 #include "qapi/error.h"
38 #include "qapi/qmp/qdict.h"
39 #include "qapi/qmp/qjson.h"
40 #include "qapi/qmp/qnull.h"
41 #include "qapi/qmp/qstring.h"
42 #include "qapi/qobject-output-visitor.h"
43 #include "qapi/qapi-visit-block-core.h"
44 #include "sysemu/block-backend.h"
45 #include "qemu/notify.h"
46 #include "qemu/option.h"
47 #include "qemu/coroutine.h"
48 #include "block/qapi.h"
49 #include "qemu/timer.h"
50 #include "qemu/cutils.h"
51 #include "qemu/id.h"
52 #include "qemu/range.h"
53 #include "qemu/rcu.h"
54 #include "block/coroutines.h"
55
56 #ifdef CONFIG_BSD
57 #include <sys/ioctl.h>
58 #include <sys/queue.h>
59 #if defined(HAVE_SYS_DISK_H)
60 #include <sys/disk.h>
61 #endif
62 #endif
63
64 #ifdef _WIN32
65 #include <windows.h>
66 #endif
67
68 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
69
70 /* Protected by BQL */
71 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
72 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
73
74 /* Protected by BQL */
75 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
77
78 /* Protected by BQL */
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81
82 static BlockDriverState *bdrv_open_inherit(const char *filename,
83 const char *reference,
84 QDict *options, int flags,
85 BlockDriverState *parent,
86 const BdrvChildClass *child_class,
87 BdrvChildRole child_role,
88 Error **errp);
89
90 static bool bdrv_recurse_has_child(BlockDriverState *bs,
91 BlockDriverState *child);
92
93 static void bdrv_child_free(BdrvChild *child);
94 static void bdrv_replace_child_noperm(BdrvChild **child,
95 BlockDriverState *new_bs,
96 bool free_empty_child);
97 static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
98 BdrvChild *child,
99 Transaction *tran);
100 static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
101 Transaction *tran);
102
103 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
104 BlockReopenQueue *queue,
105 Transaction *change_child_tran, Error **errp);
106 static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
107 static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
108
109 static bool bdrv_backing_overridden(BlockDriverState *bs);
110
111 /* If non-zero, use only whitelisted block drivers */
112 static int use_bdrv_whitelist;
113
114 #ifdef _WIN32
115 static int is_windows_drive_prefix(const char *filename)
116 {
117 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
118 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
119 filename[1] == ':');
120 }
121
122 int is_windows_drive(const char *filename)
123 {
124 if (is_windows_drive_prefix(filename) &&
125 filename[2] == '\0')
126 return 1;
127 if (strstart(filename, "\\\\.\\", NULL) ||
128 strstart(filename, "//./", NULL))
129 return 1;
130 return 0;
131 }
132 #endif
133
134 size_t bdrv_opt_mem_align(BlockDriverState *bs)
135 {
136 if (!bs || !bs->drv) {
137 /* page size or 4k (hdd sector size) should be on the safe side */
138 return MAX(4096, qemu_real_host_page_size());
139 }
140 IO_CODE();
141
142 return bs->bl.opt_mem_alignment;
143 }
144
145 size_t bdrv_min_mem_align(BlockDriverState *bs)
146 {
147 if (!bs || !bs->drv) {
148 /* page size or 4k (hdd sector size) should be on the safe side */
149 return MAX(4096, qemu_real_host_page_size());
150 }
151 IO_CODE();
152
153 return bs->bl.min_mem_alignment;
154 }
155
156 /* check if the path starts with "<protocol>:" */
157 int path_has_protocol(const char *path)
158 {
159 const char *p;
160
161 #ifdef _WIN32
162 if (is_windows_drive(path) ||
163 is_windows_drive_prefix(path)) {
164 return 0;
165 }
166 p = path + strcspn(path, ":/\\");
167 #else
168 p = path + strcspn(path, ":/");
169 #endif
170
171 return *p == ':';
172 }
173
174 int path_is_absolute(const char *path)
175 {
176 #ifdef _WIN32
177 /* specific case for names like: "\\.\d:" */
178 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
179 return 1;
180 }
181 return (*path == '/' || *path == '\\');
182 #else
183 return (*path == '/');
184 #endif
185 }
186
187 /* if filename is absolute, just return its duplicate. Otherwise, build a
188 path to it by considering it is relative to base_path. URL are
189 supported. */
190 char *path_combine(const char *base_path, const char *filename)
191 {
192 const char *protocol_stripped = NULL;
193 const char *p, *p1;
194 char *result;
195 int len;
196
197 if (path_is_absolute(filename)) {
198 return g_strdup(filename);
199 }
200
201 if (path_has_protocol(base_path)) {
202 protocol_stripped = strchr(base_path, ':');
203 if (protocol_stripped) {
204 protocol_stripped++;
205 }
206 }
207 p = protocol_stripped ?: base_path;
208
209 p1 = strrchr(base_path, '/');
210 #ifdef _WIN32
211 {
212 const char *p2;
213 p2 = strrchr(base_path, '\\');
214 if (!p1 || p2 > p1) {
215 p1 = p2;
216 }
217 }
218 #endif
219 if (p1) {
220 p1++;
221 } else {
222 p1 = base_path;
223 }
224 if (p1 > p) {
225 p = p1;
226 }
227 len = p - base_path;
228
229 result = g_malloc(len + strlen(filename) + 1);
230 memcpy(result, base_path, len);
231 strcpy(result + len, filename);
232
233 return result;
234 }
235
236 /*
237 * Helper function for bdrv_parse_filename() implementations to remove optional
238 * protocol prefixes (especially "file:") from a filename and for putting the
239 * stripped filename into the options QDict if there is such a prefix.
240 */
241 void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
242 QDict *options)
243 {
244 if (strstart(filename, prefix, &filename)) {
245 /* Stripping the explicit protocol prefix may result in a protocol
246 * prefix being (wrongly) detected (if the filename contains a colon) */
247 if (path_has_protocol(filename)) {
248 GString *fat_filename;
249
250 /* This means there is some colon before the first slash; therefore,
251 * this cannot be an absolute path */
252 assert(!path_is_absolute(filename));
253
254 /* And we can thus fix the protocol detection issue by prefixing it
255 * by "./" */
256 fat_filename = g_string_new("./");
257 g_string_append(fat_filename, filename);
258
259 assert(!path_has_protocol(fat_filename->str));
260
261 qdict_put(options, "filename",
262 qstring_from_gstring(fat_filename));
263 } else {
264 /* If no protocol prefix was detected, we can use the shortened
265 * filename as-is */
266 qdict_put_str(options, "filename", filename);
267 }
268 }
269 }
270
271
272 /* Returns whether the image file is opened as read-only. Note that this can
273 * return false and writing to the image file is still not possible because the
274 * image is inactivated. */
275 bool bdrv_is_read_only(BlockDriverState *bs)
276 {
277 IO_CODE();
278 return !(bs->open_flags & BDRV_O_RDWR);
279 }
280
281 int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
282 bool ignore_allow_rdw, Error **errp)
283 {
284 IO_CODE();
285
286 /* Do not set read_only if copy_on_read is enabled */
287 if (bs->copy_on_read && read_only) {
288 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
289 bdrv_get_device_or_node_name(bs));
290 return -EINVAL;
291 }
292
293 /* Do not clear read_only if it is prohibited */
294 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
295 !ignore_allow_rdw)
296 {
297 error_setg(errp, "Node '%s' is read only",
298 bdrv_get_device_or_node_name(bs));
299 return -EPERM;
300 }
301
302 return 0;
303 }
304
305 /*
306 * Called by a driver that can only provide a read-only image.
307 *
308 * Returns 0 if the node is already read-only or it could switch the node to
309 * read-only because BDRV_O_AUTO_RDONLY is set.
310 *
311 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
312 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
313 * is not NULL, it is used as the error message for the Error object.
314 */
315 int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
316 Error **errp)
317 {
318 int ret = 0;
319 IO_CODE();
320
321 if (!(bs->open_flags & BDRV_O_RDWR)) {
322 return 0;
323 }
324 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
325 goto fail;
326 }
327
328 ret = bdrv_can_set_read_only(bs, true, false, NULL);
329 if (ret < 0) {
330 goto fail;
331 }
332
333 bs->open_flags &= ~BDRV_O_RDWR;
334
335 return 0;
336
337 fail:
338 error_setg(errp, "%s", errmsg ?: "Image is read-only");
339 return -EACCES;
340 }
341
342 /*
343 * If @backing is empty, this function returns NULL without setting
344 * @errp. In all other cases, NULL will only be returned with @errp
345 * set.
346 *
347 * Therefore, a return value of NULL without @errp set means that
348 * there is no backing file; if @errp is set, there is one but its
349 * absolute filename cannot be generated.
350 */
351 char *bdrv_get_full_backing_filename_from_filename(const char *backed,
352 const char *backing,
353 Error **errp)
354 {
355 if (backing[0] == '\0') {
356 return NULL;
357 } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
358 return g_strdup(backing);
359 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
360 error_setg(errp, "Cannot use relative backing file names for '%s'",
361 backed);
362 return NULL;
363 } else {
364 return path_combine(backed, backing);
365 }
366 }
367
368 /*
369 * If @filename is empty or NULL, this function returns NULL without
370 * setting @errp. In all other cases, NULL will only be returned with
371 * @errp set.
372 */
373 static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
374 const char *filename, Error **errp)
375 {
376 char *dir, *full_name;
377
378 if (!filename || filename[0] == '\0') {
379 return NULL;
380 } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
381 return g_strdup(filename);
382 }
383
384 dir = bdrv_dirname(relative_to, errp);
385 if (!dir) {
386 return NULL;
387 }
388
389 full_name = g_strconcat(dir, filename, NULL);
390 g_free(dir);
391 return full_name;
392 }
393
394 char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
395 {
396 GLOBAL_STATE_CODE();
397 return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
398 }
399
400 void bdrv_register(BlockDriver *bdrv)
401 {
402 assert(bdrv->format_name);
403 GLOBAL_STATE_CODE();
404 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
405 }
406
407 BlockDriverState *bdrv_new(void)
408 {
409 BlockDriverState *bs;
410 int i;
411
412 GLOBAL_STATE_CODE();
413
414 bs = g_new0(BlockDriverState, 1);
415 QLIST_INIT(&bs->dirty_bitmaps);
416 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
417 QLIST_INIT(&bs->op_blockers[i]);
418 }
419 qemu_co_mutex_init(&bs->reqs_lock);
420 qemu_mutex_init(&bs->dirty_bitmap_mutex);
421 bs->refcnt = 1;
422 bs->aio_context = qemu_get_aio_context();
423
424 qemu_co_queue_init(&bs->flush_queue);
425
426 qemu_co_mutex_init(&bs->bsc_modify_lock);
427 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
428
429 for (i = 0; i < bdrv_drain_all_count; i++) {
430 bdrv_drained_begin(bs);
431 }
432
433 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
434
435 return bs;
436 }
437
438 static BlockDriver *bdrv_do_find_format(const char *format_name)
439 {
440 BlockDriver *drv1;
441 GLOBAL_STATE_CODE();
442
443 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
444 if (!strcmp(drv1->format_name, format_name)) {
445 return drv1;
446 }
447 }
448
449 return NULL;
450 }
451
452 BlockDriver *bdrv_find_format(const char *format_name)
453 {
454 BlockDriver *drv1;
455 int i;
456
457 GLOBAL_STATE_CODE();
458
459 drv1 = bdrv_do_find_format(format_name);
460 if (drv1) {
461 return drv1;
462 }
463
464 /* The driver isn't registered, maybe we need to load a module */
465 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
466 if (!strcmp(block_driver_modules[i].format_name, format_name)) {
467 block_module_load_one(block_driver_modules[i].library_name);
468 break;
469 }
470 }
471
472 return bdrv_do_find_format(format_name);
473 }
474
475 static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
476 {
477 static const char *whitelist_rw[] = {
478 CONFIG_BDRV_RW_WHITELIST
479 NULL
480 };
481 static const char *whitelist_ro[] = {
482 CONFIG_BDRV_RO_WHITELIST
483 NULL
484 };
485 const char **p;
486
487 if (!whitelist_rw[0] && !whitelist_ro[0]) {
488 return 1; /* no whitelist, anything goes */
489 }
490
491 for (p = whitelist_rw; *p; p++) {
492 if (!strcmp(format_name, *p)) {
493 return 1;
494 }
495 }
496 if (read_only) {
497 for (p = whitelist_ro; *p; p++) {
498 if (!strcmp(format_name, *p)) {
499 return 1;
500 }
501 }
502 }
503 return 0;
504 }
505
506 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
507 {
508 GLOBAL_STATE_CODE();
509 return bdrv_format_is_whitelisted(drv->format_name, read_only);
510 }
511
512 bool bdrv_uses_whitelist(void)
513 {
514 return use_bdrv_whitelist;
515 }
516
517 typedef struct CreateCo {
518 BlockDriver *drv;
519 char *filename;
520 QemuOpts *opts;
521 int ret;
522 Error *err;
523 } CreateCo;
524
525 static void coroutine_fn bdrv_create_co_entry(void *opaque)
526 {
527 Error *local_err = NULL;
528 int ret;
529
530 CreateCo *cco = opaque;
531 assert(cco->drv);
532 GLOBAL_STATE_CODE();
533
534 ret = cco->drv->bdrv_co_create_opts(cco->drv,
535 cco->filename, cco->opts, &local_err);
536 error_propagate(&cco->err, local_err);
537 cco->ret = ret;
538 }
539
540 int bdrv_create(BlockDriver *drv, const char* filename,
541 QemuOpts *opts, Error **errp)
542 {
543 int ret;
544
545 GLOBAL_STATE_CODE();
546
547 Coroutine *co;
548 CreateCo cco = {
549 .drv = drv,
550 .filename = g_strdup(filename),
551 .opts = opts,
552 .ret = NOT_DONE,
553 .err = NULL,
554 };
555
556 if (!drv->bdrv_co_create_opts) {
557 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
558 ret = -ENOTSUP;
559 goto out;
560 }
561
562 if (qemu_in_coroutine()) {
563 /* Fast-path if already in coroutine context */
564 bdrv_create_co_entry(&cco);
565 } else {
566 co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
567 qemu_coroutine_enter(co);
568 while (cco.ret == NOT_DONE) {
569 aio_poll(qemu_get_aio_context(), true);
570 }
571 }
572
573 ret = cco.ret;
574 if (ret < 0) {
575 if (cco.err) {
576 error_propagate(errp, cco.err);
577 } else {
578 error_setg_errno(errp, -ret, "Could not create image");
579 }
580 }
581
582 out:
583 g_free(cco.filename);
584 return ret;
585 }
586
587 /**
588 * Helper function for bdrv_create_file_fallback(): Resize @blk to at
589 * least the given @minimum_size.
590 *
591 * On success, return @blk's actual length.
592 * Otherwise, return -errno.
593 */
594 static int64_t create_file_fallback_truncate(BlockBackend *blk,
595 int64_t minimum_size, Error **errp)
596 {
597 Error *local_err = NULL;
598 int64_t size;
599 int ret;
600
601 GLOBAL_STATE_CODE();
602
603 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
604 &local_err);
605 if (ret < 0 && ret != -ENOTSUP) {
606 error_propagate(errp, local_err);
607 return ret;
608 }
609
610 size = blk_getlength(blk);
611 if (size < 0) {
612 error_free(local_err);
613 error_setg_errno(errp, -size,
614 "Failed to inquire the new image file's length");
615 return size;
616 }
617
618 if (size < minimum_size) {
619 /* Need to grow the image, but we failed to do that */
620 error_propagate(errp, local_err);
621 return -ENOTSUP;
622 }
623
624 error_free(local_err);
625 local_err = NULL;
626
627 return size;
628 }
629
630 /**
631 * Helper function for bdrv_create_file_fallback(): Zero the first
632 * sector to remove any potentially pre-existing image header.
633 */
634 static int create_file_fallback_zero_first_sector(BlockBackend *blk,
635 int64_t current_size,
636 Error **errp)
637 {
638 int64_t bytes_to_clear;
639 int ret;
640
641 GLOBAL_STATE_CODE();
642
643 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
644 if (bytes_to_clear) {
645 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
646 if (ret < 0) {
647 error_setg_errno(errp, -ret,
648 "Failed to clear the new image's first sector");
649 return ret;
650 }
651 }
652
653 return 0;
654 }
655
656 /**
657 * Simple implementation of bdrv_co_create_opts for protocol drivers
658 * which only support creation via opening a file
659 * (usually existing raw storage device)
660 */
661 int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
662 const char *filename,
663 QemuOpts *opts,
664 Error **errp)
665 {
666 BlockBackend *blk;
667 QDict *options;
668 int64_t size = 0;
669 char *buf = NULL;
670 PreallocMode prealloc;
671 Error *local_err = NULL;
672 int ret;
673
674 GLOBAL_STATE_CODE();
675
676 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
677 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
678 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
679 PREALLOC_MODE_OFF, &local_err);
680 g_free(buf);
681 if (local_err) {
682 error_propagate(errp, local_err);
683 return -EINVAL;
684 }
685
686 if (prealloc != PREALLOC_MODE_OFF) {
687 error_setg(errp, "Unsupported preallocation mode '%s'",
688 PreallocMode_str(prealloc));
689 return -ENOTSUP;
690 }
691
692 options = qdict_new();
693 qdict_put_str(options, "driver", drv->format_name);
694
695 blk = blk_new_open(filename, NULL, options,
696 BDRV_O_RDWR | BDRV_O_RESIZE, errp);
697 if (!blk) {
698 error_prepend(errp, "Protocol driver '%s' does not support image "
699 "creation, and opening the image failed: ",
700 drv->format_name);
701 return -EINVAL;
702 }
703
704 size = create_file_fallback_truncate(blk, size, errp);
705 if (size < 0) {
706 ret = size;
707 goto out;
708 }
709
710 ret = create_file_fallback_zero_first_sector(blk, size, errp);
711 if (ret < 0) {
712 goto out;
713 }
714
715 ret = 0;
716 out:
717 blk_unref(blk);
718 return ret;
719 }
720
721 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
722 {
723 QemuOpts *protocol_opts;
724 BlockDriver *drv;
725 QDict *qdict;
726 int ret;
727
728 GLOBAL_STATE_CODE();
729
730 drv = bdrv_find_protocol(filename, true, errp);
731 if (drv == NULL) {
732 return -ENOENT;
733 }
734
735 if (!drv->create_opts) {
736 error_setg(errp, "Driver '%s' does not support image creation",
737 drv->format_name);
738 return -ENOTSUP;
739 }
740
741 /*
742 * 'opts' contains a QemuOptsList with a combination of format and protocol
743 * default values.
744 *
745 * The format properly removes its options, but the default values remain
746 * in 'opts->list'. So if the protocol has options with the same name
747 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
748 * of the format, since for overlapping options, the format wins.
749 *
750 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
751 * only the set options, and then convert it back to QemuOpts, using the
752 * create_opts of the protocol. So the new QemuOpts, will contain only the
753 * protocol defaults.
754 */
755 qdict = qemu_opts_to_qdict(opts, NULL);
756 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
757 if (protocol_opts == NULL) {
758 ret = -EINVAL;
759 goto out;
760 }
761
762 ret = bdrv_create(drv, filename, protocol_opts, errp);
763 out:
764 qemu_opts_del(protocol_opts);
765 qobject_unref(qdict);
766 return ret;
767 }
768
769 int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
770 {
771 Error *local_err = NULL;
772 int ret;
773
774 IO_CODE();
775 assert(bs != NULL);
776
777 if (!bs->drv) {
778 error_setg(errp, "Block node '%s' is not opened", bs->filename);
779 return -ENOMEDIUM;
780 }
781
782 if (!bs->drv->bdrv_co_delete_file) {
783 error_setg(errp, "Driver '%s' does not support image deletion",
784 bs->drv->format_name);
785 return -ENOTSUP;
786 }
787
788 ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
789 if (ret < 0) {
790 error_propagate(errp, local_err);
791 }
792
793 return ret;
794 }
795
796 void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
797 {
798 Error *local_err = NULL;
799 int ret;
800 IO_CODE();
801
802 if (!bs) {
803 return;
804 }
805
806 ret = bdrv_co_delete_file(bs, &local_err);
807 /*
808 * ENOTSUP will happen if the block driver doesn't support
809 * the 'bdrv_co_delete_file' interface. This is a predictable
810 * scenario and shouldn't be reported back to the user.
811 */
812 if (ret == -ENOTSUP) {
813 error_free(local_err);
814 } else if (ret < 0) {
815 error_report_err(local_err);
816 }
817 }
818
819 /**
820 * Try to get @bs's logical and physical block size.
821 * On success, store them in @bsz struct and return 0.
822 * On failure return -errno.
823 * @bs must not be empty.
824 */
825 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
826 {
827 BlockDriver *drv = bs->drv;
828 BlockDriverState *filtered = bdrv_filter_bs(bs);
829 GLOBAL_STATE_CODE();
830
831 if (drv && drv->bdrv_probe_blocksizes) {
832 return drv->bdrv_probe_blocksizes(bs, bsz);
833 } else if (filtered) {
834 return bdrv_probe_blocksizes(filtered, bsz);
835 }
836
837 return -ENOTSUP;
838 }
839
840 /**
841 * Try to get @bs's geometry (cyls, heads, sectors).
842 * On success, store them in @geo struct and return 0.
843 * On failure return -errno.
844 * @bs must not be empty.
845 */
846 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
847 {
848 BlockDriver *drv = bs->drv;
849 BlockDriverState *filtered = bdrv_filter_bs(bs);
850 GLOBAL_STATE_CODE();
851
852 if (drv && drv->bdrv_probe_geometry) {
853 return drv->bdrv_probe_geometry(bs, geo);
854 } else if (filtered) {
855 return bdrv_probe_geometry(filtered, geo);
856 }
857
858 return -ENOTSUP;
859 }
860
861 /*
862 * Create a uniquely-named empty temporary file.
863 * Return 0 upon success, otherwise a negative errno value.
864 */
865 int get_tmp_filename(char *filename, int size)
866 {
867 #ifdef _WIN32
868 char temp_dir[MAX_PATH];
869 /* GetTempFileName requires that its output buffer (4th param)
870 have length MAX_PATH or greater. */
871 assert(size >= MAX_PATH);
872 return (GetTempPath(MAX_PATH, temp_dir)
873 && GetTempFileName(temp_dir, "qem", 0, filename)
874 ? 0 : -GetLastError());
875 #else
876 int fd;
877 const char *tmpdir;
878 tmpdir = getenv("TMPDIR");
879 if (!tmpdir) {
880 tmpdir = "/var/tmp";
881 }
882 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
883 return -EOVERFLOW;
884 }
885 fd = mkstemp(filename);
886 if (fd < 0) {
887 return -errno;
888 }
889 if (close(fd) != 0) {
890 unlink(filename);
891 return -errno;
892 }
893 return 0;
894 #endif
895 }
896
897 /*
898 * Detect host devices. By convention, /dev/cdrom[N] is always
899 * recognized as a host CDROM.
900 */
901 static BlockDriver *find_hdev_driver(const char *filename)
902 {
903 int score_max = 0, score;
904 BlockDriver *drv = NULL, *d;
905 GLOBAL_STATE_CODE();
906
907 QLIST_FOREACH(d, &bdrv_drivers, list) {
908 if (d->bdrv_probe_device) {
909 score = d->bdrv_probe_device(filename);
910 if (score > score_max) {
911 score_max = score;
912 drv = d;
913 }
914 }
915 }
916
917 return drv;
918 }
919
920 static BlockDriver *bdrv_do_find_protocol(const char *protocol)
921 {
922 BlockDriver *drv1;
923 GLOBAL_STATE_CODE();
924
925 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
926 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
927 return drv1;
928 }
929 }
930
931 return NULL;
932 }
933
934 BlockDriver *bdrv_find_protocol(const char *filename,
935 bool allow_protocol_prefix,
936 Error **errp)
937 {
938 BlockDriver *drv1;
939 char protocol[128];
940 int len;
941 const char *p;
942 int i;
943
944 GLOBAL_STATE_CODE();
945 /* TODO Drivers without bdrv_file_open must be specified explicitly */
946
947 /*
948 * XXX(hch): we really should not let host device detection
949 * override an explicit protocol specification, but moving this
950 * later breaks access to device names with colons in them.
951 * Thanks to the brain-dead persistent naming schemes on udev-
952 * based Linux systems those actually are quite common.
953 */
954 drv1 = find_hdev_driver(filename);
955 if (drv1) {
956 return drv1;
957 }
958
959 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
960 return &bdrv_file;
961 }
962
963 p = strchr(filename, ':');
964 assert(p != NULL);
965 len = p - filename;
966 if (len > sizeof(protocol) - 1)
967 len = sizeof(protocol) - 1;
968 memcpy(protocol, filename, len);
969 protocol[len] = '\0';
970
971 drv1 = bdrv_do_find_protocol(protocol);
972 if (drv1) {
973 return drv1;
974 }
975
976 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
977 if (block_driver_modules[i].protocol_name &&
978 !strcmp(block_driver_modules[i].protocol_name, protocol)) {
979 block_module_load_one(block_driver_modules[i].library_name);
980 break;
981 }
982 }
983
984 drv1 = bdrv_do_find_protocol(protocol);
985 if (!drv1) {
986 error_setg(errp, "Unknown protocol '%s'", protocol);
987 }
988 return drv1;
989 }
990
991 /*
992 * Guess image format by probing its contents.
993 * This is not a good idea when your image is raw (CVE-2008-2004), but
994 * we do it anyway for backward compatibility.
995 *
996 * @buf contains the image's first @buf_size bytes.
997 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
998 * but can be smaller if the image file is smaller)
999 * @filename is its filename.
1000 *
1001 * For all block drivers, call the bdrv_probe() method to get its
1002 * probing score.
1003 * Return the first block driver with the highest probing score.
1004 */
1005 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
1006 const char *filename)
1007 {
1008 int score_max = 0, score;
1009 BlockDriver *drv = NULL, *d;
1010 IO_CODE();
1011
1012 QLIST_FOREACH(d, &bdrv_drivers, list) {
1013 if (d->bdrv_probe) {
1014 score = d->bdrv_probe(buf, buf_size, filename);
1015 if (score > score_max) {
1016 score_max = score;
1017 drv = d;
1018 }
1019 }
1020 }
1021
1022 return drv;
1023 }
1024
1025 static int find_image_format(BlockBackend *file, const char *filename,
1026 BlockDriver **pdrv, Error **errp)
1027 {
1028 BlockDriver *drv;
1029 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
1030 int ret = 0;
1031
1032 GLOBAL_STATE_CODE();
1033
1034 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
1035 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
1036 *pdrv = &bdrv_raw;
1037 return ret;
1038 }
1039
1040 ret = blk_pread(file, 0, sizeof(buf), buf, 0);
1041 if (ret < 0) {
1042 error_setg_errno(errp, -ret, "Could not read image for determining its "
1043 "format");
1044 *pdrv = NULL;
1045 return ret;
1046 }
1047
1048 drv = bdrv_probe_all(buf, sizeof(buf), filename);
1049 if (!drv) {
1050 error_setg(errp, "Could not determine image format: No compatible "
1051 "driver found");
1052 *pdrv = NULL;
1053 return -ENOENT;
1054 }
1055
1056 *pdrv = drv;
1057 return 0;
1058 }
1059
1060 /**
1061 * Set the current 'total_sectors' value
1062 * Return 0 on success, -errno on error.
1063 */
1064 int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
1065 {
1066 BlockDriver *drv = bs->drv;
1067 IO_CODE();
1068
1069 if (!drv) {
1070 return -ENOMEDIUM;
1071 }
1072
1073 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
1074 if (bdrv_is_sg(bs))
1075 return 0;
1076
1077 /* query actual device if possible, otherwise just trust the hint */
1078 if (drv->bdrv_getlength) {
1079 int64_t length = drv->bdrv_getlength(bs);
1080 if (length < 0) {
1081 return length;
1082 }
1083 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
1084 }
1085
1086 bs->total_sectors = hint;
1087
1088 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
1089 return -EFBIG;
1090 }
1091
1092 return 0;
1093 }
1094
1095 /**
1096 * Combines a QDict of new block driver @options with any missing options taken
1097 * from @old_options, so that leaving out an option defaults to its old value.
1098 */
1099 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1100 QDict *old_options)
1101 {
1102 GLOBAL_STATE_CODE();
1103 if (bs->drv && bs->drv->bdrv_join_options) {
1104 bs->drv->bdrv_join_options(options, old_options);
1105 } else {
1106 qdict_join(options, old_options, false);
1107 }
1108 }
1109
1110 static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1111 int open_flags,
1112 Error **errp)
1113 {
1114 Error *local_err = NULL;
1115 char *value = qemu_opt_get_del(opts, "detect-zeroes");
1116 BlockdevDetectZeroesOptions detect_zeroes =
1117 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1118 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
1119 GLOBAL_STATE_CODE();
1120 g_free(value);
1121 if (local_err) {
1122 error_propagate(errp, local_err);
1123 return detect_zeroes;
1124 }
1125
1126 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1127 !(open_flags & BDRV_O_UNMAP))
1128 {
1129 error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1130 "without setting discard operation to unmap");
1131 }
1132
1133 return detect_zeroes;
1134 }
1135
1136 /**
1137 * Set open flags for aio engine
1138 *
1139 * Return 0 on success, -1 if the engine specified is invalid
1140 */
1141 int bdrv_parse_aio(const char *mode, int *flags)
1142 {
1143 if (!strcmp(mode, "threads")) {
1144 /* do nothing, default */
1145 } else if (!strcmp(mode, "native")) {
1146 *flags |= BDRV_O_NATIVE_AIO;
1147 #ifdef CONFIG_LINUX_IO_URING
1148 } else if (!strcmp(mode, "io_uring")) {
1149 *flags |= BDRV_O_IO_URING;
1150 #endif
1151 } else {
1152 return -1;
1153 }
1154
1155 return 0;
1156 }
1157
1158 /**
1159 * Set open flags for a given discard mode
1160 *
1161 * Return 0 on success, -1 if the discard mode was invalid.
1162 */
1163 int bdrv_parse_discard_flags(const char *mode, int *flags)
1164 {
1165 *flags &= ~BDRV_O_UNMAP;
1166
1167 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1168 /* do nothing */
1169 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1170 *flags |= BDRV_O_UNMAP;
1171 } else {
1172 return -1;
1173 }
1174
1175 return 0;
1176 }
1177
1178 /**
1179 * Set open flags for a given cache mode
1180 *
1181 * Return 0 on success, -1 if the cache mode was invalid.
1182 */
1183 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
1184 {
1185 *flags &= ~BDRV_O_CACHE_MASK;
1186
1187 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
1188 *writethrough = false;
1189 *flags |= BDRV_O_NOCACHE;
1190 } else if (!strcmp(mode, "directsync")) {
1191 *writethrough = true;
1192 *flags |= BDRV_O_NOCACHE;
1193 } else if (!strcmp(mode, "writeback")) {
1194 *writethrough = false;
1195 } else if (!strcmp(mode, "unsafe")) {
1196 *writethrough = false;
1197 *flags |= BDRV_O_NO_FLUSH;
1198 } else if (!strcmp(mode, "writethrough")) {
1199 *writethrough = true;
1200 } else {
1201 return -1;
1202 }
1203
1204 return 0;
1205 }
1206
1207 static char *bdrv_child_get_parent_desc(BdrvChild *c)
1208 {
1209 BlockDriverState *parent = c->opaque;
1210 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
1211 }
1212
1213 static void bdrv_child_cb_drained_begin(BdrvChild *child)
1214 {
1215 BlockDriverState *bs = child->opaque;
1216 bdrv_do_drained_begin_quiesce(bs, NULL, false);
1217 }
1218
1219 static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1220 {
1221 BlockDriverState *bs = child->opaque;
1222 return bdrv_drain_poll(bs, false, NULL, false);
1223 }
1224
1225 static void bdrv_child_cb_drained_end(BdrvChild *child,
1226 int *drained_end_counter)
1227 {
1228 BlockDriverState *bs = child->opaque;
1229 bdrv_drained_end_no_poll(bs, drained_end_counter);
1230 }
1231
1232 static int bdrv_child_cb_inactivate(BdrvChild *child)
1233 {
1234 BlockDriverState *bs = child->opaque;
1235 GLOBAL_STATE_CODE();
1236 assert(bs->open_flags & BDRV_O_INACTIVE);
1237 return 0;
1238 }
1239
1240 static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1241 GSList **ignore, Error **errp)
1242 {
1243 BlockDriverState *bs = child->opaque;
1244 return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
1245 }
1246
1247 static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1248 GSList **ignore)
1249 {
1250 BlockDriverState *bs = child->opaque;
1251 return bdrv_set_aio_context_ignore(bs, ctx, ignore);
1252 }
1253
1254 /*
1255 * Returns the options and flags that a temporary snapshot should get, based on
1256 * the originally requested flags (the originally requested image will have
1257 * flags like a backing file)
1258 */
1259 static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1260 int parent_flags, QDict *parent_options)
1261 {
1262 GLOBAL_STATE_CODE();
1263 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1264
1265 /* For temporary files, unconditional cache=unsafe is fine */
1266 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1267 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
1268
1269 /* Copy the read-only and discard options from the parent */
1270 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1271 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
1272
1273 /* aio=native doesn't work for cache.direct=off, so disable it for the
1274 * temporary snapshot */
1275 *child_flags &= ~BDRV_O_NATIVE_AIO;
1276 }
1277
1278 static void bdrv_backing_attach(BdrvChild *c)
1279 {
1280 BlockDriverState *parent = c->opaque;
1281 BlockDriverState *backing_hd = c->bs;
1282
1283 GLOBAL_STATE_CODE();
1284 assert(!parent->backing_blocker);
1285 error_setg(&parent->backing_blocker,
1286 "node is used as backing hd of '%s'",
1287 bdrv_get_device_or_node_name(parent));
1288
1289 bdrv_refresh_filename(backing_hd);
1290
1291 parent->open_flags &= ~BDRV_O_NO_BACKING;
1292
1293 bdrv_op_block_all(backing_hd, parent->backing_blocker);
1294 /* Otherwise we won't be able to commit or stream */
1295 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1296 parent->backing_blocker);
1297 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1298 parent->backing_blocker);
1299 /*
1300 * We do backup in 3 ways:
1301 * 1. drive backup
1302 * The target bs is new opened, and the source is top BDS
1303 * 2. blockdev backup
1304 * Both the source and the target are top BDSes.
1305 * 3. internal backup(used for block replication)
1306 * Both the source and the target are backing file
1307 *
1308 * In case 1 and 2, neither the source nor the target is the backing file.
1309 * In case 3, we will block the top BDS, so there is only one block job
1310 * for the top BDS and its backing chain.
1311 */
1312 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1313 parent->backing_blocker);
1314 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1315 parent->backing_blocker);
1316 }
1317
1318 static void bdrv_backing_detach(BdrvChild *c)
1319 {
1320 BlockDriverState *parent = c->opaque;
1321
1322 GLOBAL_STATE_CODE();
1323 assert(parent->backing_blocker);
1324 bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1325 error_free(parent->backing_blocker);
1326 parent->backing_blocker = NULL;
1327 }
1328
1329 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1330 const char *filename, Error **errp)
1331 {
1332 BlockDriverState *parent = c->opaque;
1333 bool read_only = bdrv_is_read_only(parent);
1334 int ret;
1335 GLOBAL_STATE_CODE();
1336
1337 if (read_only) {
1338 ret = bdrv_reopen_set_read_only(parent, false, errp);
1339 if (ret < 0) {
1340 return ret;
1341 }
1342 }
1343
1344 ret = bdrv_change_backing_file(parent, filename,
1345 base->drv ? base->drv->format_name : "",
1346 false);
1347 if (ret < 0) {
1348 error_setg_errno(errp, -ret, "Could not update backing file link");
1349 }
1350
1351 if (read_only) {
1352 bdrv_reopen_set_read_only(parent, true, NULL);
1353 }
1354
1355 return ret;
1356 }
1357
1358 /*
1359 * Returns the options and flags that a generic child of a BDS should
1360 * get, based on the given options and flags for the parent BDS.
1361 */
1362 static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1363 int *child_flags, QDict *child_options,
1364 int parent_flags, QDict *parent_options)
1365 {
1366 int flags = parent_flags;
1367 GLOBAL_STATE_CODE();
1368
1369 /*
1370 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1371 * Generally, the question to answer is: Should this child be
1372 * format-probed by default?
1373 */
1374
1375 /*
1376 * Pure and non-filtered data children of non-format nodes should
1377 * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1378 * set). This only affects a very limited set of drivers (namely
1379 * quorum and blkverify when this comment was written).
1380 * Force-clear BDRV_O_PROTOCOL then.
1381 */
1382 if (!parent_is_format &&
1383 (role & BDRV_CHILD_DATA) &&
1384 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1385 {
1386 flags &= ~BDRV_O_PROTOCOL;
1387 }
1388
1389 /*
1390 * All children of format nodes (except for COW children) and all
1391 * metadata children in general should never be format-probed.
1392 * Force-set BDRV_O_PROTOCOL then.
1393 */
1394 if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1395 (role & BDRV_CHILD_METADATA))
1396 {
1397 flags |= BDRV_O_PROTOCOL;
1398 }
1399
1400 /*
1401 * If the cache mode isn't explicitly set, inherit direct and no-flush from
1402 * the parent.
1403 */
1404 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1405 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1406 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1407
1408 if (role & BDRV_CHILD_COW) {
1409 /* backing files are opened read-only by default */
1410 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1411 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1412 } else {
1413 /* Inherit the read-only option from the parent if it's not set */
1414 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1415 qdict_copy_default(child_options, parent_options,
1416 BDRV_OPT_AUTO_READ_ONLY);
1417 }
1418
1419 /*
1420 * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1421 * can default to enable it on lower layers regardless of the
1422 * parent option.
1423 */
1424 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1425
1426 /* Clear flags that only apply to the top layer */
1427 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1428
1429 if (role & BDRV_CHILD_METADATA) {
1430 flags &= ~BDRV_O_NO_IO;
1431 }
1432 if (role & BDRV_CHILD_COW) {
1433 flags &= ~BDRV_O_TEMPORARY;
1434 }
1435
1436 *child_flags = flags;
1437 }
1438
1439 static void bdrv_child_cb_attach(BdrvChild *child)
1440 {
1441 BlockDriverState *bs = child->opaque;
1442
1443 assert_bdrv_graph_writable(bs);
1444 QLIST_INSERT_HEAD(&bs->children, child, next);
1445
1446 if (child->role & BDRV_CHILD_COW) {
1447 bdrv_backing_attach(child);
1448 }
1449
1450 bdrv_apply_subtree_drain(child, bs);
1451 }
1452
1453 static void bdrv_child_cb_detach(BdrvChild *child)
1454 {
1455 BlockDriverState *bs = child->opaque;
1456
1457 if (child->role & BDRV_CHILD_COW) {
1458 bdrv_backing_detach(child);
1459 }
1460
1461 bdrv_unapply_subtree_drain(child, bs);
1462
1463 assert_bdrv_graph_writable(bs);
1464 QLIST_REMOVE(child, next);
1465 }
1466
1467 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1468 const char *filename, Error **errp)
1469 {
1470 if (c->role & BDRV_CHILD_COW) {
1471 return bdrv_backing_update_filename(c, base, filename, errp);
1472 }
1473 return 0;
1474 }
1475
1476 AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
1477 {
1478 BlockDriverState *bs = c->opaque;
1479 IO_CODE();
1480
1481 return bdrv_get_aio_context(bs);
1482 }
1483
1484 const BdrvChildClass child_of_bds = {
1485 .parent_is_bds = true,
1486 .get_parent_desc = bdrv_child_get_parent_desc,
1487 .inherit_options = bdrv_inherited_options,
1488 .drained_begin = bdrv_child_cb_drained_begin,
1489 .drained_poll = bdrv_child_cb_drained_poll,
1490 .drained_end = bdrv_child_cb_drained_end,
1491 .attach = bdrv_child_cb_attach,
1492 .detach = bdrv_child_cb_detach,
1493 .inactivate = bdrv_child_cb_inactivate,
1494 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
1495 .set_aio_ctx = bdrv_child_cb_set_aio_ctx,
1496 .update_filename = bdrv_child_cb_update_filename,
1497 .get_parent_aio_context = child_of_bds_get_parent_aio_context,
1498 };
1499
1500 AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
1501 {
1502 GLOBAL_STATE_CODE();
1503 return c->klass->get_parent_aio_context(c);
1504 }
1505
1506 static int bdrv_open_flags(BlockDriverState *bs, int flags)
1507 {
1508 int open_flags = flags;
1509 GLOBAL_STATE_CODE();
1510
1511 /*
1512 * Clear flags that are internal to the block layer before opening the
1513 * image.
1514 */
1515 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
1516
1517 return open_flags;
1518 }
1519
1520 static void update_flags_from_options(int *flags, QemuOpts *opts)
1521 {
1522 GLOBAL_STATE_CODE();
1523
1524 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
1525
1526 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
1527 *flags |= BDRV_O_NO_FLUSH;
1528 }
1529
1530 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
1531 *flags |= BDRV_O_NOCACHE;
1532 }
1533
1534 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
1535 *flags |= BDRV_O_RDWR;
1536 }
1537
1538 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1539 *flags |= BDRV_O_AUTO_RDONLY;
1540 }
1541 }
1542
1543 static void update_options_from_flags(QDict *options, int flags)
1544 {
1545 GLOBAL_STATE_CODE();
1546 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
1547 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
1548 }
1549 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
1550 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1551 flags & BDRV_O_NO_FLUSH);
1552 }
1553 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
1554 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
1555 }
1556 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1557 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1558 flags & BDRV_O_AUTO_RDONLY);
1559 }
1560 }
1561
1562 static void bdrv_assign_node_name(BlockDriverState *bs,
1563 const char *node_name,
1564 Error **errp)
1565 {
1566 char *gen_node_name = NULL;
1567 GLOBAL_STATE_CODE();
1568
1569 if (!node_name) {
1570 node_name = gen_node_name = id_generate(ID_BLOCK);
1571 } else if (!id_wellformed(node_name)) {
1572 /*
1573 * Check for empty string or invalid characters, but not if it is
1574 * generated (generated names use characters not available to the user)
1575 */
1576 error_setg(errp, "Invalid node-name: '%s'", node_name);
1577 return;
1578 }
1579
1580 /* takes care of avoiding namespaces collisions */
1581 if (blk_by_name(node_name)) {
1582 error_setg(errp, "node-name=%s is conflicting with a device id",
1583 node_name);
1584 goto out;
1585 }
1586
1587 /* takes care of avoiding duplicates node names */
1588 if (bdrv_find_node(node_name)) {
1589 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
1590 goto out;
1591 }
1592
1593 /* Make sure that the node name isn't truncated */
1594 if (strlen(node_name) >= sizeof(bs->node_name)) {
1595 error_setg(errp, "Node name too long");
1596 goto out;
1597 }
1598
1599 /* copy node name into the bs and insert it into the graph list */
1600 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1601 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
1602 out:
1603 g_free(gen_node_name);
1604 }
1605
1606 static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1607 const char *node_name, QDict *options,
1608 int open_flags, Error **errp)
1609 {
1610 Error *local_err = NULL;
1611 int i, ret;
1612 GLOBAL_STATE_CODE();
1613
1614 bdrv_assign_node_name(bs, node_name, &local_err);
1615 if (local_err) {
1616 error_propagate(errp, local_err);
1617 return -EINVAL;
1618 }
1619
1620 bs->drv = drv;
1621 bs->opaque = g_malloc0(drv->instance_size);
1622
1623 if (drv->bdrv_file_open) {
1624 assert(!drv->bdrv_needs_filename || bs->filename[0]);
1625 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1626 } else if (drv->bdrv_open) {
1627 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1628 } else {
1629 ret = 0;
1630 }
1631
1632 if (ret < 0) {
1633 if (local_err) {
1634 error_propagate(errp, local_err);
1635 } else if (bs->filename[0]) {
1636 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1637 } else {
1638 error_setg_errno(errp, -ret, "Could not open image");
1639 }
1640 goto open_failed;
1641 }
1642
1643 ret = refresh_total_sectors(bs, bs->total_sectors);
1644 if (ret < 0) {
1645 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1646 return ret;
1647 }
1648
1649 bdrv_refresh_limits(bs, NULL, &local_err);
1650 if (local_err) {
1651 error_propagate(errp, local_err);
1652 return -EINVAL;
1653 }
1654
1655 assert(bdrv_opt_mem_align(bs) != 0);
1656 assert(bdrv_min_mem_align(bs) != 0);
1657 assert(is_power_of_2(bs->bl.request_alignment));
1658
1659 for (i = 0; i < bs->quiesce_counter; i++) {
1660 if (drv->bdrv_co_drain_begin) {
1661 drv->bdrv_co_drain_begin(bs);
1662 }
1663 }
1664
1665 return 0;
1666 open_failed:
1667 bs->drv = NULL;
1668 if (bs->file != NULL) {
1669 bdrv_unref_child(bs, bs->file);
1670 bs->file = NULL;
1671 }
1672 g_free(bs->opaque);
1673 bs->opaque = NULL;
1674 return ret;
1675 }
1676
1677 /*
1678 * Create and open a block node.
1679 *
1680 * @options is a QDict of options to pass to the block drivers, or NULL for an
1681 * empty set of options. The reference to the QDict belongs to the block layer
1682 * after the call (even on failure), so if the caller intends to reuse the
1683 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
1684 */
1685 BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
1686 const char *node_name,
1687 QDict *options, int flags,
1688 Error **errp)
1689 {
1690 BlockDriverState *bs;
1691 int ret;
1692
1693 GLOBAL_STATE_CODE();
1694
1695 bs = bdrv_new();
1696 bs->open_flags = flags;
1697 bs->options = options ?: qdict_new();
1698 bs->explicit_options = qdict_clone_shallow(bs->options);
1699 bs->opaque = NULL;
1700
1701 update_options_from_flags(bs->options, flags);
1702
1703 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1704 if (ret < 0) {
1705 qobject_unref(bs->explicit_options);
1706 bs->explicit_options = NULL;
1707 qobject_unref(bs->options);
1708 bs->options = NULL;
1709 bdrv_unref(bs);
1710 return NULL;
1711 }
1712
1713 return bs;
1714 }
1715
1716 /* Create and open a block node. */
1717 BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1718 int flags, Error **errp)
1719 {
1720 GLOBAL_STATE_CODE();
1721 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
1722 }
1723
1724 QemuOptsList bdrv_runtime_opts = {
1725 .name = "bdrv_common",
1726 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1727 .desc = {
1728 {
1729 .name = "node-name",
1730 .type = QEMU_OPT_STRING,
1731 .help = "Node name of the block device node",
1732 },
1733 {
1734 .name = "driver",
1735 .type = QEMU_OPT_STRING,
1736 .help = "Block driver to use for the node",
1737 },
1738 {
1739 .name = BDRV_OPT_CACHE_DIRECT,
1740 .type = QEMU_OPT_BOOL,
1741 .help = "Bypass software writeback cache on the host",
1742 },
1743 {
1744 .name = BDRV_OPT_CACHE_NO_FLUSH,
1745 .type = QEMU_OPT_BOOL,
1746 .help = "Ignore flush requests",
1747 },
1748 {
1749 .name = BDRV_OPT_READ_ONLY,
1750 .type = QEMU_OPT_BOOL,
1751 .help = "Node is opened in read-only mode",
1752 },
1753 {
1754 .name = BDRV_OPT_AUTO_READ_ONLY,
1755 .type = QEMU_OPT_BOOL,
1756 .help = "Node can become read-only if opening read-write fails",
1757 },
1758 {
1759 .name = "detect-zeroes",
1760 .type = QEMU_OPT_STRING,
1761 .help = "try to optimize zero writes (off, on, unmap)",
1762 },
1763 {
1764 .name = BDRV_OPT_DISCARD,
1765 .type = QEMU_OPT_STRING,
1766 .help = "discard operation (ignore/off, unmap/on)",
1767 },
1768 {
1769 .name = BDRV_OPT_FORCE_SHARE,
1770 .type = QEMU_OPT_BOOL,
1771 .help = "always accept other writers (default: off)",
1772 },
1773 { /* end of list */ }
1774 },
1775 };
1776
1777 QemuOptsList bdrv_create_opts_simple = {
1778 .name = "simple-create-opts",
1779 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
1780 .desc = {
1781 {
1782 .name = BLOCK_OPT_SIZE,
1783 .type = QEMU_OPT_SIZE,
1784 .help = "Virtual disk size"
1785 },
1786 {
1787 .name = BLOCK_OPT_PREALLOC,
1788 .type = QEMU_OPT_STRING,
1789 .help = "Preallocation mode (allowed values: off)"
1790 },
1791 { /* end of list */ }
1792 }
1793 };
1794
1795 /*
1796 * Common part for opening disk images and files
1797 *
1798 * Removes all processed options from *options.
1799 */
1800 static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
1801 QDict *options, Error **errp)
1802 {
1803 int ret, open_flags;
1804 const char *filename;
1805 const char *driver_name = NULL;
1806 const char *node_name = NULL;
1807 const char *discard;
1808 QemuOpts *opts;
1809 BlockDriver *drv;
1810 Error *local_err = NULL;
1811 bool ro;
1812
1813 assert(bs->file == NULL);
1814 assert(options != NULL && bs->options != options);
1815 GLOBAL_STATE_CODE();
1816
1817 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1818 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
1819 ret = -EINVAL;
1820 goto fail_opts;
1821 }
1822
1823 update_flags_from_options(&bs->open_flags, opts);
1824
1825 driver_name = qemu_opt_get(opts, "driver");
1826 drv = bdrv_find_format(driver_name);
1827 assert(drv != NULL);
1828
1829 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1830
1831 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1832 error_setg(errp,
1833 BDRV_OPT_FORCE_SHARE
1834 "=on can only be used with read-only images");
1835 ret = -EINVAL;
1836 goto fail_opts;
1837 }
1838
1839 if (file != NULL) {
1840 bdrv_refresh_filename(blk_bs(file));
1841 filename = blk_bs(file)->filename;
1842 } else {
1843 /*
1844 * Caution: while qdict_get_try_str() is fine, getting
1845 * non-string types would require more care. When @options
1846 * come from -blockdev or blockdev_add, its members are typed
1847 * according to the QAPI schema, but when they come from
1848 * -drive, they're all QString.
1849 */
1850 filename = qdict_get_try_str(options, "filename");
1851 }
1852
1853 if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
1854 error_setg(errp, "The '%s' block driver requires a file name",
1855 drv->format_name);
1856 ret = -EINVAL;
1857 goto fail_opts;
1858 }
1859
1860 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1861 drv->format_name);
1862
1863 ro = bdrv_is_read_only(bs);
1864
1865 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
1866 if (!ro && bdrv_is_whitelisted(drv, true)) {
1867 ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1868 } else {
1869 ret = -ENOTSUP;
1870 }
1871 if (ret < 0) {
1872 error_setg(errp,
1873 !ro && bdrv_is_whitelisted(drv, true)
1874 ? "Driver '%s' can only be used for read-only devices"
1875 : "Driver '%s' is not whitelisted",
1876 drv->format_name);
1877 goto fail_opts;
1878 }
1879 }
1880
1881 /* bdrv_new() and bdrv_close() make it so */
1882 assert(qatomic_read(&bs->copy_on_read) == 0);
1883
1884 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
1885 if (!ro) {
1886 bdrv_enable_copy_on_read(bs);
1887 } else {
1888 error_setg(errp, "Can't use copy-on-read on read-only device");
1889 ret = -EINVAL;
1890 goto fail_opts;
1891 }
1892 }
1893
1894 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
1895 if (discard != NULL) {
1896 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1897 error_setg(errp, "Invalid discard option");
1898 ret = -EINVAL;
1899 goto fail_opts;
1900 }
1901 }
1902
1903 bs->detect_zeroes =
1904 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1905 if (local_err) {
1906 error_propagate(errp, local_err);
1907 ret = -EINVAL;
1908 goto fail_opts;
1909 }
1910
1911 if (filename != NULL) {
1912 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1913 } else {
1914 bs->filename[0] = '\0';
1915 }
1916 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
1917
1918 /* Open the image, either directly or using a protocol */
1919 open_flags = bdrv_open_flags(bs, bs->open_flags);
1920 node_name = qemu_opt_get(opts, "node-name");
1921
1922 assert(!drv->bdrv_file_open || file == NULL);
1923 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
1924 if (ret < 0) {
1925 goto fail_opts;
1926 }
1927
1928 qemu_opts_del(opts);
1929 return 0;
1930
1931 fail_opts:
1932 qemu_opts_del(opts);
1933 return ret;
1934 }
1935
1936 static QDict *parse_json_filename(const char *filename, Error **errp)
1937 {
1938 QObject *options_obj;
1939 QDict *options;
1940 int ret;
1941 GLOBAL_STATE_CODE();
1942
1943 ret = strstart(filename, "json:", &filename);
1944 assert(ret);
1945
1946 options_obj = qobject_from_json(filename, errp);
1947 if (!options_obj) {
1948 error_prepend(errp, "Could not parse the JSON options: ");
1949 return NULL;
1950 }
1951
1952 options = qobject_to(QDict, options_obj);
1953 if (!options) {
1954 qobject_unref(options_obj);
1955 error_setg(errp, "Invalid JSON object given");
1956 return NULL;
1957 }
1958
1959 qdict_flatten(options);
1960
1961 return options;
1962 }
1963
1964 static void parse_json_protocol(QDict *options, const char **pfilename,
1965 Error **errp)
1966 {
1967 QDict *json_options;
1968 Error *local_err = NULL;
1969 GLOBAL_STATE_CODE();
1970
1971 /* Parse json: pseudo-protocol */
1972 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1973 return;
1974 }
1975
1976 json_options = parse_json_filename(*pfilename, &local_err);
1977 if (local_err) {
1978 error_propagate(errp, local_err);
1979 return;
1980 }
1981
1982 /* Options given in the filename have lower priority than options
1983 * specified directly */
1984 qdict_join(options, json_options, false);
1985 qobject_unref(json_options);
1986 *pfilename = NULL;
1987 }
1988
1989 /*
1990 * Fills in default options for opening images and converts the legacy
1991 * filename/flags pair to option QDict entries.
1992 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1993 * block driver has been specified explicitly.
1994 */
1995 static int bdrv_fill_options(QDict **options, const char *filename,
1996 int *flags, Error **errp)
1997 {
1998 const char *drvname;
1999 bool protocol = *flags & BDRV_O_PROTOCOL;
2000 bool parse_filename = false;
2001 BlockDriver *drv = NULL;
2002 Error *local_err = NULL;
2003
2004 GLOBAL_STATE_CODE();
2005
2006 /*
2007 * Caution: while qdict_get_try_str() is fine, getting non-string
2008 * types would require more care. When @options come from
2009 * -blockdev or blockdev_add, its members are typed according to
2010 * the QAPI schema, but when they come from -drive, they're all
2011 * QString.
2012 */
2013 drvname = qdict_get_try_str(*options, "driver");
2014 if (drvname) {
2015 drv = bdrv_find_format(drvname);
2016 if (!drv) {
2017 error_setg(errp, "Unknown driver '%s'", drvname);
2018 return -ENOENT;
2019 }
2020 /* If the user has explicitly specified the driver, this choice should
2021 * override the BDRV_O_PROTOCOL flag */
2022 protocol = drv->bdrv_file_open;
2023 }
2024
2025 if (protocol) {
2026 *flags |= BDRV_O_PROTOCOL;
2027 } else {
2028 *flags &= ~BDRV_O_PROTOCOL;
2029 }
2030
2031 /* Translate cache options from flags into options */
2032 update_options_from_flags(*options, *flags);
2033
2034 /* Fetch the file name from the options QDict if necessary */
2035 if (protocol && filename) {
2036 if (!qdict_haskey(*options, "filename")) {
2037 qdict_put_str(*options, "filename", filename);
2038 parse_filename = true;
2039 } else {
2040 error_setg(errp, "Can't specify 'file' and 'filename' options at "
2041 "the same time");
2042 return -EINVAL;
2043 }
2044 }
2045
2046 /* Find the right block driver */
2047 /* See cautionary note on accessing @options above */
2048 filename = qdict_get_try_str(*options, "filename");
2049
2050 if (!drvname && protocol) {
2051 if (filename) {
2052 drv = bdrv_find_protocol(filename, parse_filename, errp);
2053 if (!drv) {
2054 return -EINVAL;
2055 }
2056
2057 drvname = drv->format_name;
2058 qdict_put_str(*options, "driver", drvname);
2059 } else {
2060 error_setg(errp, "Must specify either driver or file");
2061 return -EINVAL;
2062 }
2063 }
2064
2065 assert(drv || !protocol);
2066
2067 /* Driver-specific filename parsing */
2068 if (drv && drv->bdrv_parse_filename && parse_filename) {
2069 drv->bdrv_parse_filename(filename, *options, &local_err);
2070 if (local_err) {
2071 error_propagate(errp, local_err);
2072 return -EINVAL;
2073 }
2074
2075 if (!drv->bdrv_needs_filename) {
2076 qdict_del(*options, "filename");
2077 }
2078 }
2079
2080 return 0;
2081 }
2082
2083 typedef struct BlockReopenQueueEntry {
2084 bool prepared;
2085 bool perms_checked;
2086 BDRVReopenState state;
2087 QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
2088 } BlockReopenQueueEntry;
2089
2090 /*
2091 * Return the flags that @bs will have after the reopens in @q have
2092 * successfully completed. If @q is NULL (or @bs is not contained in @q),
2093 * return the current flags.
2094 */
2095 static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
2096 {
2097 BlockReopenQueueEntry *entry;
2098
2099 if (q != NULL) {
2100 QTAILQ_FOREACH(entry, q, entry) {
2101 if (entry->state.bs == bs) {
2102 return entry->state.flags;
2103 }
2104 }
2105 }
2106
2107 return bs->open_flags;
2108 }
2109
2110 /* Returns whether the image file can be written to after the reopen queue @q
2111 * has been successfully applied, or right now if @q is NULL. */
2112 static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
2113 BlockReopenQueue *q)
2114 {
2115 int flags = bdrv_reopen_get_flags(q, bs);
2116
2117 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
2118 }
2119
2120 /*
2121 * Return whether the BDS can be written to. This is not necessarily
2122 * the same as !bdrv_is_read_only(bs), as inactivated images may not
2123 * be written to but do not count as read-only images.
2124 */
2125 bool bdrv_is_writable(BlockDriverState *bs)
2126 {
2127 IO_CODE();
2128 return bdrv_is_writable_after_reopen(bs, NULL);
2129 }
2130
2131 static char *bdrv_child_user_desc(BdrvChild *c)
2132 {
2133 GLOBAL_STATE_CODE();
2134 return c->klass->get_parent_desc(c);
2135 }
2136
2137 /*
2138 * Check that @a allows everything that @b needs. @a and @b must reference same
2139 * child node.
2140 */
2141 static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
2142 {
2143 const char *child_bs_name;
2144 g_autofree char *a_user = NULL;
2145 g_autofree char *b_user = NULL;
2146 g_autofree char *perms = NULL;
2147
2148 assert(a->bs);
2149 assert(a->bs == b->bs);
2150 GLOBAL_STATE_CODE();
2151
2152 if ((b->perm & a->shared_perm) == b->perm) {
2153 return true;
2154 }
2155
2156 child_bs_name = bdrv_get_node_name(b->bs);
2157 a_user = bdrv_child_user_desc(a);
2158 b_user = bdrv_child_user_desc(b);
2159 perms = bdrv_perm_names(b->perm & ~a->shared_perm);
2160
2161 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
2162 "both required by %s (uses node '%s' as '%s' child) and "
2163 "unshared by %s (uses node '%s' as '%s' child).",
2164 child_bs_name, perms,
2165 b_user, child_bs_name, b->name,
2166 a_user, child_bs_name, a->name);
2167
2168 return false;
2169 }
2170
2171 static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
2172 {
2173 BdrvChild *a, *b;
2174 GLOBAL_STATE_CODE();
2175
2176 /*
2177 * During the loop we'll look at each pair twice. That's correct because
2178 * bdrv_a_allow_b() is asymmetric and we should check each pair in both
2179 * directions.
2180 */
2181 QLIST_FOREACH(a, &bs->parents, next_parent) {
2182 QLIST_FOREACH(b, &bs->parents, next_parent) {
2183 if (a == b) {
2184 continue;
2185 }
2186
2187 if (!bdrv_a_allow_b(a, b, errp)) {
2188 return true;
2189 }
2190 }
2191 }
2192
2193 return false;
2194 }
2195
2196 static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
2197 BdrvChild *c, BdrvChildRole role,
2198 BlockReopenQueue *reopen_queue,
2199 uint64_t parent_perm, uint64_t parent_shared,
2200 uint64_t *nperm, uint64_t *nshared)
2201 {
2202 assert(bs->drv && bs->drv->bdrv_child_perm);
2203 GLOBAL_STATE_CODE();
2204 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
2205 parent_perm, parent_shared,
2206 nperm, nshared);
2207 /* TODO Take force_share from reopen_queue */
2208 if (child_bs && child_bs->force_share) {
2209 *nshared = BLK_PERM_ALL;
2210 }
2211 }
2212
2213 /*
2214 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
2215 * nodes that are already in the @list, of course) so that final list is
2216 * topologically sorted. Return the result (GSList @list object is updated, so
2217 * don't use old reference after function call).
2218 *
2219 * On function start @list must be already topologically sorted and for any node
2220 * in the @list the whole subtree of the node must be in the @list as well. The
2221 * simplest way to satisfy this criteria: use only result of
2222 * bdrv_topological_dfs() or NULL as @list parameter.
2223 */
2224 static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
2225 BlockDriverState *bs)
2226 {
2227 BdrvChild *child;
2228 g_autoptr(GHashTable) local_found = NULL;
2229
2230 GLOBAL_STATE_CODE();
2231
2232 if (!found) {
2233 assert(!list);
2234 found = local_found = g_hash_table_new(NULL, NULL);
2235 }
2236
2237 if (g_hash_table_contains(found, bs)) {
2238 return list;
2239 }
2240 g_hash_table_add(found, bs);
2241
2242 QLIST_FOREACH(child, &bs->children, next) {
2243 list = bdrv_topological_dfs(list, found, child->bs);
2244 }
2245
2246 return g_slist_prepend(list, bs);
2247 }
2248
2249 typedef struct BdrvChildSetPermState {
2250 BdrvChild *child;
2251 uint64_t old_perm;
2252 uint64_t old_shared_perm;
2253 } BdrvChildSetPermState;
2254
2255 static void bdrv_child_set_perm_abort(void *opaque)
2256 {
2257 BdrvChildSetPermState *s = opaque;
2258
2259 GLOBAL_STATE_CODE();
2260
2261 s->child->perm = s->old_perm;
2262 s->child->shared_perm = s->old_shared_perm;
2263 }
2264
2265 static TransactionActionDrv bdrv_child_set_pem_drv = {
2266 .abort = bdrv_child_set_perm_abort,
2267 .clean = g_free,
2268 };
2269
2270 static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
2271 uint64_t shared, Transaction *tran)
2272 {
2273 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
2274 GLOBAL_STATE_CODE();
2275
2276 *s = (BdrvChildSetPermState) {
2277 .child = c,
2278 .old_perm = c->perm,
2279 .old_shared_perm = c->shared_perm,
2280 };
2281
2282 c->perm = perm;
2283 c->shared_perm = shared;
2284
2285 tran_add(tran, &bdrv_child_set_pem_drv, s);
2286 }
2287
2288 static void bdrv_drv_set_perm_commit(void *opaque)
2289 {
2290 BlockDriverState *bs = opaque;
2291 uint64_t cumulative_perms, cumulative_shared_perms;
2292 GLOBAL_STATE_CODE();
2293
2294 if (bs->drv->bdrv_set_perm) {
2295 bdrv_get_cumulative_perm(bs, &cumulative_perms,
2296 &cumulative_shared_perms);
2297 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2298 }
2299 }
2300
2301 static void bdrv_drv_set_perm_abort(void *opaque)
2302 {
2303 BlockDriverState *bs = opaque;
2304 GLOBAL_STATE_CODE();
2305
2306 if (bs->drv->bdrv_abort_perm_update) {
2307 bs->drv->bdrv_abort_perm_update(bs);
2308 }
2309 }
2310
2311 TransactionActionDrv bdrv_drv_set_perm_drv = {
2312 .abort = bdrv_drv_set_perm_abort,
2313 .commit = bdrv_drv_set_perm_commit,
2314 };
2315
2316 static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
2317 uint64_t shared_perm, Transaction *tran,
2318 Error **errp)
2319 {
2320 GLOBAL_STATE_CODE();
2321 if (!bs->drv) {
2322 return 0;
2323 }
2324
2325 if (bs->drv->bdrv_check_perm) {
2326 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
2327 if (ret < 0) {
2328 return ret;
2329 }
2330 }
2331
2332 if (tran) {
2333 tran_add(tran, &bdrv_drv_set_perm_drv, bs);
2334 }
2335
2336 return 0;
2337 }
2338
2339 typedef struct BdrvReplaceChildState {
2340 BdrvChild *child;
2341 BdrvChild **childp;
2342 BlockDriverState *old_bs;
2343 bool free_empty_child;
2344 } BdrvReplaceChildState;
2345
2346 static void bdrv_replace_child_commit(void *opaque)
2347 {
2348 BdrvReplaceChildState *s = opaque;
2349 GLOBAL_STATE_CODE();
2350
2351 if (s->free_empty_child && !s->child->bs) {
2352 bdrv_child_free(s->child);
2353 }
2354 bdrv_unref(s->old_bs);
2355 }
2356
2357 static void bdrv_replace_child_abort(void *opaque)
2358 {
2359 BdrvReplaceChildState *s = opaque;
2360 BlockDriverState *new_bs = s->child->bs;
2361
2362 GLOBAL_STATE_CODE();
2363 /*
2364 * old_bs reference is transparently moved from @s to s->child.
2365 *
2366 * Pass &s->child here instead of s->childp, because:
2367 * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
2368 * modify the BdrvChild * pointer we indirectly pass to it, i.e. it
2369 * will not modify s->child. From that perspective, it does not matter
2370 * whether we pass s->childp or &s->child.
2371 * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use
2372 * it here.
2373 * (3) If new_bs is NULL, *s->childp will have been NULLed by
2374 * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
2375 * must not pass a NULL *s->childp here.
2376 *
2377 * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
2378 * any case, there is no reason to pass it anyway.
2379 */
2380 bdrv_replace_child_noperm(&s->child, s->old_bs, true);
2381 /*
2382 * The child was pre-existing, so s->old_bs must be non-NULL, and
2383 * s->child thus must not have been freed
2384 */
2385 assert(s->child != NULL);
2386 if (!new_bs) {
2387 /* As described above, *s->childp was cleared, so restore it */
2388 assert(s->childp != NULL);
2389 *s->childp = s->child;
2390 }
2391 bdrv_unref(new_bs);
2392 }
2393
2394 static TransactionActionDrv bdrv_replace_child_drv = {
2395 .commit = bdrv_replace_child_commit,
2396 .abort = bdrv_replace_child_abort,
2397 .clean = g_free,
2398 };
2399
2400 /*
2401 * bdrv_replace_child_tran
2402 *
2403 * Note: real unref of old_bs is done only on commit.
2404 *
2405 * The function doesn't update permissions, caller is responsible for this.
2406 *
2407 * (*childp)->bs must not be NULL.
2408 *
2409 * Note that if new_bs == NULL, @childp is stored in a state object attached
2410 * to @tran, so that the old child can be reinstated in the abort handler.
2411 * Therefore, if @new_bs can be NULL, @childp must stay valid until the
2412 * transaction is committed or aborted.
2413 *
2414 * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
2415 * freed (on commit). @free_empty_child should only be false if the
2416 * caller will free the BDrvChild themselves (which may be important
2417 * if this is in turn called in another transactional context).
2418 */
2419 static void bdrv_replace_child_tran(BdrvChild **childp,
2420 BlockDriverState *new_bs,
2421 Transaction *tran,
2422 bool free_empty_child)
2423 {
2424 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
2425 *s = (BdrvReplaceChildState) {
2426 .child = *childp,
2427 .childp = new_bs == NULL ? childp : NULL,
2428 .old_bs = (*childp)->bs,
2429 .free_empty_child = free_empty_child,
2430 };
2431 tran_add(tran, &bdrv_replace_child_drv, s);
2432
2433 /* The abort handler relies on this */
2434 assert(s->old_bs != NULL);
2435
2436 if (new_bs) {
2437 bdrv_ref(new_bs);
2438 }
2439 /*
2440 * Pass free_empty_child=false, we will free the child (if
2441 * necessary) in bdrv_replace_child_commit() (if our
2442 * @free_empty_child parameter was true).
2443 */
2444 bdrv_replace_child_noperm(childp, new_bs, false);
2445 /* old_bs reference is transparently moved from *childp to @s */
2446 }
2447
2448 /*
2449 * Refresh permissions in @bs subtree. The function is intended to be called
2450 * after some graph modification that was done without permission update.
2451 */
2452 static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
2453 Transaction *tran, Error **errp)
2454 {
2455 BlockDriver *drv = bs->drv;
2456 BdrvChild *c;
2457 int ret;
2458 uint64_t cumulative_perms, cumulative_shared_perms;
2459 GLOBAL_STATE_CODE();
2460
2461 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
2462
2463 /* Write permissions never work with read-only images */
2464 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2465 !bdrv_is_writable_after_reopen(bs, q))
2466 {
2467 if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2468 error_setg(errp, "Block node is read-only");
2469 } else {
2470 error_setg(errp, "Read-only block node '%s' cannot support "
2471 "read-write users", bdrv_get_node_name(bs));
2472 }
2473
2474 return -EPERM;
2475 }
2476
2477 /*
2478 * Unaligned requests will automatically be aligned to bl.request_alignment
2479 * and without RESIZE we can't extend requests to write to space beyond the
2480 * end of the image, so it's required that the image size is aligned.
2481 */
2482 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2483 !(cumulative_perms & BLK_PERM_RESIZE))
2484 {
2485 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2486 error_setg(errp, "Cannot get 'write' permission without 'resize': "
2487 "Image size is not a multiple of request "
2488 "alignment");
2489 return -EPERM;
2490 }
2491 }
2492
2493 /* Check this node */
2494 if (!drv) {
2495 return 0;
2496 }
2497
2498 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
2499 errp);
2500 if (ret < 0) {
2501 return ret;
2502 }
2503
2504 /* Drivers that never have children can omit .bdrv_child_perm() */
2505 if (!drv->bdrv_child_perm) {
2506 assert(QLIST_EMPTY(&bs->children));
2507 return 0;
2508 }
2509
2510 /* Check all children */
2511 QLIST_FOREACH(c, &bs->children, next) {
2512 uint64_t cur_perm, cur_shared;
2513
2514 bdrv_child_perm(bs, c->bs, c, c->role, q,
2515 cumulative_perms, cumulative_shared_perms,
2516 &cur_perm, &cur_shared);
2517 bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
2518 }
2519
2520 return 0;
2521 }
2522
2523 static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
2524 Transaction *tran, Error **errp)
2525 {
2526 int ret;
2527 BlockDriverState *bs;
2528 GLOBAL_STATE_CODE();
2529
2530 for ( ; list; list = list->next) {
2531 bs = list->data;
2532
2533 if (bdrv_parent_perms_conflict(bs, errp)) {
2534 return -EINVAL;
2535 }
2536
2537 ret = bdrv_node_refresh_perm(bs, q, tran, errp);
2538 if (ret < 0) {
2539 return ret;
2540 }
2541 }
2542
2543 return 0;
2544 }
2545
2546 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2547 uint64_t *shared_perm)
2548 {
2549 BdrvChild *c;
2550 uint64_t cumulative_perms = 0;
2551 uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2552
2553 GLOBAL_STATE_CODE();
2554
2555 QLIST_FOREACH(c, &bs->parents, next_parent) {
2556 cumulative_perms |= c->perm;
2557 cumulative_shared_perms &= c->shared_perm;
2558 }
2559
2560 *perm = cumulative_perms;
2561 *shared_perm = cumulative_shared_perms;
2562 }
2563
2564 char *bdrv_perm_names(uint64_t perm)
2565 {
2566 struct perm_name {
2567 uint64_t perm;
2568 const char *name;
2569 } permissions[] = {
2570 { BLK_PERM_CONSISTENT_READ, "consistent read" },
2571 { BLK_PERM_WRITE, "write" },
2572 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2573 { BLK_PERM_RESIZE, "resize" },
2574 { 0, NULL }
2575 };
2576
2577 GString *result = g_string_sized_new(30);
2578 struct perm_name *p;
2579
2580 for (p = permissions; p->name; p++) {
2581 if (perm & p->perm) {
2582 if (result->len > 0) {
2583 g_string_append(result, ", ");
2584 }
2585 g_string_append(result, p->name);
2586 }
2587 }
2588
2589 return g_string_free(result, FALSE);
2590 }
2591
2592
2593 static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
2594 {
2595 int ret;
2596 Transaction *tran = tran_new();
2597 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
2598 GLOBAL_STATE_CODE();
2599
2600 ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
2601 tran_finalize(tran, ret);
2602
2603 return ret;
2604 }
2605
2606 int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2607 Error **errp)
2608 {
2609 Error *local_err = NULL;
2610 Transaction *tran = tran_new();
2611 int ret;
2612
2613 GLOBAL_STATE_CODE();
2614
2615 bdrv_child_set_perm(c, perm, shared, tran);
2616
2617 ret = bdrv_refresh_perms(c->bs, &local_err);
2618
2619 tran_finalize(tran, ret);
2620
2621 if (ret < 0) {
2622 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2623 /* tighten permissions */
2624 error_propagate(errp, local_err);
2625 } else {
2626 /*
2627 * Our caller may intend to only loosen restrictions and
2628 * does not expect this function to fail. Errors are not
2629 * fatal in such a case, so we can just hide them from our
2630 * caller.
2631 */
2632 error_free(local_err);
2633 ret = 0;
2634 }
2635 }
2636
2637 return ret;
2638 }
2639
2640 int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2641 {
2642 uint64_t parent_perms, parent_shared;
2643 uint64_t perms, shared;
2644
2645 GLOBAL_STATE_CODE();
2646
2647 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
2648 bdrv_child_perm(bs, c->bs, c, c->role, NULL,
2649 parent_perms, parent_shared, &perms, &shared);
2650
2651 return bdrv_child_try_set_perm(c, perms, shared, errp);
2652 }
2653
2654 /*
2655 * Default implementation for .bdrv_child_perm() for block filters:
2656 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2657 * filtered child.
2658 */
2659 static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
2660 BdrvChildRole role,
2661 BlockReopenQueue *reopen_queue,
2662 uint64_t perm, uint64_t shared,
2663 uint64_t *nperm, uint64_t *nshared)
2664 {
2665 GLOBAL_STATE_CODE();
2666 *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2667 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
2668 }
2669
2670 static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
2671 BdrvChildRole role,
2672 BlockReopenQueue *reopen_queue,
2673 uint64_t perm, uint64_t shared,
2674 uint64_t *nperm, uint64_t *nshared)
2675 {
2676 assert(role & BDRV_CHILD_COW);
2677 GLOBAL_STATE_CODE();
2678
2679 /*
2680 * We want consistent read from backing files if the parent needs it.
2681 * No other operations are performed on backing files.
2682 */
2683 perm &= BLK_PERM_CONSISTENT_READ;
2684
2685 /*
2686 * If the parent can deal with changing data, we're okay with a
2687 * writable and resizable backing file.
2688 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2689 */
2690 if (shared & BLK_PERM_WRITE) {
2691 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2692 } else {
2693 shared = 0;
2694 }
2695
2696 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
2697
2698 if (bs->open_flags & BDRV_O_INACTIVE) {
2699 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2700 }
2701
2702 *nperm = perm;
2703 *nshared = shared;
2704 }
2705
2706 static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
2707 BdrvChildRole role,
2708 BlockReopenQueue *reopen_queue,
2709 uint64_t perm, uint64_t shared,
2710 uint64_t *nperm, uint64_t *nshared)
2711 {
2712 int flags;
2713
2714 GLOBAL_STATE_CODE();
2715 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
2716
2717 flags = bdrv_reopen_get_flags(reopen_queue, bs);
2718
2719 /*
2720 * Apart from the modifications below, the same permissions are
2721 * forwarded and left alone as for filters
2722 */
2723 bdrv_filter_default_perms(bs, c, role, reopen_queue,
2724 perm, shared, &perm, &shared);
2725
2726 if (role & BDRV_CHILD_METADATA) {
2727 /* Format drivers may touch metadata even if the guest doesn't write */
2728 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2729 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2730 }
2731
2732 /*
2733 * bs->file always needs to be consistent because of the
2734 * metadata. We can never allow other users to resize or write
2735 * to it.
2736 */
2737 if (!(flags & BDRV_O_NO_IO)) {
2738 perm |= BLK_PERM_CONSISTENT_READ;
2739 }
2740 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
2741 }
2742
2743 if (role & BDRV_CHILD_DATA) {
2744 /*
2745 * Technically, everything in this block is a subset of the
2746 * BDRV_CHILD_METADATA path taken above, and so this could
2747 * be an "else if" branch. However, that is not obvious, and
2748 * this function is not performance critical, therefore we let
2749 * this be an independent "if".
2750 */
2751
2752 /*
2753 * We cannot allow other users to resize the file because the
2754 * format driver might have some assumptions about the size
2755 * (e.g. because it is stored in metadata, or because the file
2756 * is split into fixed-size data files).
2757 */
2758 shared &= ~BLK_PERM_RESIZE;
2759
2760 /*
2761 * WRITE_UNCHANGED often cannot be performed as such on the
2762 * data file. For example, the qcow2 driver may still need to
2763 * write copied clusters on copy-on-read.
2764 */
2765 if (perm & BLK_PERM_WRITE_UNCHANGED) {
2766 perm |= BLK_PERM_WRITE;
2767 }
2768
2769 /*
2770 * If the data file is written to, the format driver may
2771 * expect to be able to resize it by writing beyond the EOF.
2772 */
2773 if (perm & BLK_PERM_WRITE) {
2774 perm |= BLK_PERM_RESIZE;
2775 }
2776 }
2777
2778 if (bs->open_flags & BDRV_O_INACTIVE) {
2779 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2780 }
2781
2782 *nperm = perm;
2783 *nshared = shared;
2784 }
2785
2786 void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
2787 BdrvChildRole role, BlockReopenQueue *reopen_queue,
2788 uint64_t perm, uint64_t shared,
2789 uint64_t *nperm, uint64_t *nshared)
2790 {
2791 GLOBAL_STATE_CODE();
2792 if (role & BDRV_CHILD_FILTERED) {
2793 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2794 BDRV_CHILD_COW)));
2795 bdrv_filter_default_perms(bs, c, role, reopen_queue,
2796 perm, shared, nperm, nshared);
2797 } else if (role & BDRV_CHILD_COW) {
2798 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
2799 bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2800 perm, shared, nperm, nshared);
2801 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
2802 bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2803 perm, shared, nperm, nshared);
2804 } else {
2805 g_assert_not_reached();
2806 }
2807 }
2808
2809 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2810 {
2811 static const uint64_t permissions[] = {
2812 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ,
2813 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
2814 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
2815 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
2816 };
2817
2818 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2819 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2820
2821 assert(qapi_perm < BLOCK_PERMISSION__MAX);
2822
2823 return permissions[qapi_perm];
2824 }
2825
2826 /**
2827 * Replace (*childp)->bs by @new_bs.
2828 *
2829 * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
2830 * generally cannot handle a BdrvChild with .bs == NULL, so clearing
2831 * BdrvChild.bs should generally immediately be followed by the
2832 * BdrvChild pointer being cleared as well.
2833 *
2834 * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
2835 * freed. @free_empty_child should only be false if the caller will
2836 * free the BdrvChild themselves (this may be important in a
2837 * transactional context, where it may only be freed on commit).
2838 */
2839 static void bdrv_replace_child_noperm(BdrvChild **childp,
2840 BlockDriverState *new_bs,
2841 bool free_empty_child)
2842 {
2843 BdrvChild *child = *childp;
2844 BlockDriverState *old_bs = child->bs;
2845 int new_bs_quiesce_counter;
2846 int drain_saldo;
2847
2848 assert(!child->frozen);
2849 assert(old_bs != new_bs);
2850 GLOBAL_STATE_CODE();
2851
2852 if (old_bs && new_bs) {
2853 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2854 }
2855
2856 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2857 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
2858
2859 /*
2860 * If the new child node is drained but the old one was not, flush
2861 * all outstanding requests to the old child node.
2862 */
2863 while (drain_saldo > 0 && child->klass->drained_begin) {
2864 bdrv_parent_drained_begin_single(child, true);
2865 drain_saldo--;
2866 }
2867
2868 if (old_bs) {
2869 /* Detach first so that the recursive drain sections coming from @child
2870 * are already gone and we only end the drain sections that came from
2871 * elsewhere. */
2872 if (child->klass->detach) {
2873 child->klass->detach(child);
2874 }
2875 assert_bdrv_graph_writable(old_bs);
2876 QLIST_REMOVE(child, next_parent);
2877 }
2878
2879 child->bs = new_bs;
2880 if (!new_bs) {
2881 *childp = NULL;
2882 }
2883
2884 if (new_bs) {
2885 assert_bdrv_graph_writable(new_bs);
2886 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
2887
2888 /*
2889 * Detaching the old node may have led to the new node's
2890 * quiesce_counter having been decreased. Not a problem, we
2891 * just need to recognize this here and then invoke
2892 * drained_end appropriately more often.
2893 */
2894 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
2895 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
2896
2897 /* Attach only after starting new drained sections, so that recursive
2898 * drain sections coming from @child don't get an extra .drained_begin
2899 * callback. */
2900 if (child->klass->attach) {
2901 child->klass->attach(child);
2902 }
2903 }
2904
2905 /*
2906 * If the old child node was drained but the new one is not, allow
2907 * requests to come in only after the new node has been attached.
2908 */
2909 while (drain_saldo < 0 && child->klass->drained_end) {
2910 bdrv_parent_drained_end_single(child);
2911 drain_saldo++;
2912 }
2913
2914 if (free_empty_child && !child->bs) {
2915 bdrv_child_free(child);
2916 }
2917 }
2918
2919 /**
2920 * Free the given @child.
2921 *
2922 * The child must be empty (i.e. `child->bs == NULL`) and it must be
2923 * unused (i.e. not in a children list).
2924 */
2925 static void bdrv_child_free(BdrvChild *child)
2926 {
2927 assert(!child->bs);
2928 GLOBAL_STATE_CODE();
2929 assert(!child->next.le_prev); /* not in children list */
2930
2931 g_free(child->name);
2932 g_free(child);
2933 }
2934
2935 typedef struct BdrvAttachChildCommonState {
2936 BdrvChild **child;
2937 AioContext *old_parent_ctx;
2938 AioContext *old_child_ctx;
2939 } BdrvAttachChildCommonState;
2940
2941 static void bdrv_attach_child_common_abort(void *opaque)
2942 {
2943 BdrvAttachChildCommonState *s = opaque;
2944 BdrvChild *child = *s->child;
2945 BlockDriverState *bs = child->bs;
2946
2947 GLOBAL_STATE_CODE();
2948 /*
2949 * Pass free_empty_child=false, because we still need the child
2950 * for the AioContext operations on the parent below; those
2951 * BdrvChildClass methods all work on a BdrvChild object, so we
2952 * need to keep it as an empty shell (after this function, it will
2953 * not be attached to any parent, and it will not have a .bs).
2954 */
2955 bdrv_replace_child_noperm(s->child, NULL, false);
2956
2957 if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
2958 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
2959 }
2960
2961 if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
2962 GSList *ignore;
2963
2964 /* No need to ignore `child`, because it has been detached already */
2965 ignore = NULL;
2966 child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
2967 &error_abort);
2968 g_slist_free(ignore);
2969
2970 ignore = NULL;
2971 child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
2972 g_slist_free(ignore);
2973 }
2974
2975 bdrv_unref(bs);
2976 bdrv_child_free(child);
2977 }
2978
2979 static TransactionActionDrv bdrv_attach_child_common_drv = {
2980 .abort = bdrv_attach_child_common_abort,
2981 .clean = g_free,
2982 };
2983
2984 /*
2985 * Common part of attaching bdrv child to bs or to blk or to job
2986 *
2987 * Resulting new child is returned through @child.
2988 * At start *@child must be NULL.
2989 * @child is saved to a new entry of @tran, so that *@child could be reverted to
2990 * NULL on abort(). So referenced variable must live at least until transaction
2991 * end.
2992 *
2993 * Function doesn't update permissions, caller is responsible for this.
2994 */
2995 static int bdrv_attach_child_common(BlockDriverState *child_bs,
2996 const char *child_name,
2997 const BdrvChildClass *child_class,
2998 BdrvChildRole child_role,
2999 uint64_t perm, uint64_t shared_perm,
3000 void *opaque, BdrvChild **child,
3001 Transaction *tran, Error **errp)
3002 {
3003 BdrvChild *new_child;
3004 AioContext *parent_ctx;
3005 AioContext *child_ctx = bdrv_get_aio_context(child_bs);
3006
3007 assert(child);
3008 assert(*child == NULL);
3009 assert(child_class->get_parent_desc);
3010 GLOBAL_STATE_CODE();
3011
3012 new_child = g_new(BdrvChild, 1);
3013 *new_child = (BdrvChild) {
3014 .bs = NULL,
3015 .name = g_strdup(child_name),
3016 .klass = child_class,
3017 .role = child_role,
3018 .perm = perm,
3019 .shared_perm = shared_perm,
3020 .opaque = opaque,
3021 };
3022
3023 /*
3024 * If the AioContexts don't match, first try to move the subtree of
3025 * child_bs into the AioContext of the new parent. If this doesn't work,
3026 * try moving the parent into the AioContext of child_bs instead.
3027 */
3028 parent_ctx = bdrv_child_get_parent_aio_context(new_child);
3029 if (child_ctx != parent_ctx) {
3030 Error *local_err = NULL;
3031 int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
3032
3033 if (ret < 0 && child_class->can_set_aio_ctx) {
3034 GSList *ignore = g_slist_prepend(NULL, new_child);
3035 if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
3036 NULL))
3037 {
3038 error_free(local_err);
3039 ret = 0;
3040 g_slist_free(ignore);
3041 ignore = g_slist_prepend(NULL, new_child);
3042 child_class->set_aio_ctx(new_child, child_ctx, &ignore);
3043 }
3044 g_slist_free(ignore);
3045 }
3046
3047 if (ret < 0) {
3048 error_propagate(errp, local_err);
3049 bdrv_child_free(new_child);
3050 return ret;
3051 }
3052 }
3053
3054 bdrv_ref(child_bs);
3055 bdrv_replace_child_noperm(&new_child, child_bs, true);
3056 /* child_bs was non-NULL, so new_child must not have been freed */
3057 assert(new_child != NULL);
3058
3059 *child = new_child;
3060
3061 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
3062 *s = (BdrvAttachChildCommonState) {
3063 .child = child,
3064 .old_parent_ctx = parent_ctx,
3065 .old_child_ctx = child_ctx,
3066 };
3067 tran_add(tran, &bdrv_attach_child_common_drv, s);
3068
3069 return 0;
3070 }
3071
3072 /*
3073 * Variable referenced by @child must live at least until transaction end.
3074 * (see bdrv_attach_child_common() doc for details)
3075 *
3076 * Function doesn't update permissions, caller is responsible for this.
3077 */
3078 static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
3079 BlockDriverState *child_bs,
3080 const char *child_name,
3081 const BdrvChildClass *child_class,
3082 BdrvChildRole child_role,
3083 BdrvChild **child,
3084 Transaction *tran,
3085 Error **errp)
3086 {
3087 int ret;
3088 uint64_t perm, shared_perm;
3089
3090 assert(parent_bs->drv);
3091 GLOBAL_STATE_CODE();
3092
3093 if (bdrv_recurse_has_child(child_bs, parent_bs)) {
3094 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
3095 child_bs->node_name, child_name, parent_bs->node_name);
3096 return -EINVAL;
3097 }
3098
3099 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
3100 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
3101 perm, shared_perm, &perm, &shared_perm);
3102
3103 ret = bdrv_attach_child_common(child_bs, child_name, child_class,
3104 child_role, perm, shared_perm, parent_bs,
3105 child, tran, errp);
3106 if (ret < 0) {
3107 return ret;
3108 }
3109
3110 return 0;
3111 }
3112
3113 static void bdrv_detach_child(BdrvChild **childp)
3114 {
3115 BlockDriverState *old_bs = (*childp)->bs;
3116
3117 GLOBAL_STATE_CODE();
3118 bdrv_replace_child_noperm(childp, NULL, true);
3119
3120 if (old_bs) {
3121 /*
3122 * Update permissions for old node. We're just taking a parent away, so
3123 * we're loosening restrictions. Errors of permission update are not
3124 * fatal in this case, ignore them.
3125 */
3126 bdrv_refresh_perms(old_bs, NULL);
3127
3128 /*
3129 * When the parent requiring a non-default AioContext is removed, the
3130 * node moves back to the main AioContext
3131 */
3132 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
3133 }
3134 }
3135
3136 /*
3137 * This function steals the reference to child_bs from the caller.
3138 * That reference is later dropped by bdrv_root_unref_child().
3139 *
3140 * On failure NULL is returned, errp is set and the reference to
3141 * child_bs is also dropped.
3142 *
3143 * The caller must hold the AioContext lock @child_bs, but not that of @ctx
3144 * (unless @child_bs is already in @ctx).
3145 */
3146 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
3147 const char *child_name,
3148 const BdrvChildClass *child_class,
3149 BdrvChildRole child_role,
3150 uint64_t perm, uint64_t shared_perm,
3151 void *opaque, Error **errp)
3152 {
3153 int ret;
3154 BdrvChild *child = NULL;
3155 Transaction *tran = tran_new();
3156
3157 GLOBAL_STATE_CODE();
3158
3159 ret = bdrv_attach_child_common(child_bs, child_name, child_class,
3160 child_role, perm, shared_perm, opaque,
3161 &child, tran, errp);
3162 if (ret < 0) {
3163 goto out;
3164 }
3165
3166 ret = bdrv_refresh_perms(child_bs, errp);
3167
3168 out:
3169 tran_finalize(tran, ret);
3170 /* child is unset on failure by bdrv_attach_child_common_abort() */
3171 assert((ret < 0) == !child);
3172
3173 bdrv_unref(child_bs);
3174 return child;
3175 }
3176
3177 /*
3178 * This function transfers the reference to child_bs from the caller
3179 * to parent_bs. That reference is later dropped by parent_bs on
3180 * bdrv_close() or if someone calls bdrv_unref_child().
3181 *
3182 * On failure NULL is returned, errp is set and the reference to
3183 * child_bs is also dropped.
3184 *
3185 * If @parent_bs and @child_bs are in different AioContexts, the caller must
3186 * hold the AioContext lock for @child_bs, but not for @parent_bs.
3187 */
3188 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
3189 BlockDriverState *child_bs,
3190 const char *child_name,
3191 const BdrvChildClass *child_class,
3192 BdrvChildRole child_role,
3193 Error **errp)
3194 {
3195 int ret;
3196 BdrvChild *child = NULL;
3197 Transaction *tran = tran_new();
3198
3199 GLOBAL_STATE_CODE();
3200
3201 ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
3202 child_role, &child, tran, errp);
3203 if (ret < 0) {
3204 goto out;
3205 }
3206
3207 ret = bdrv_refresh_perms(parent_bs, errp);
3208 if (ret < 0) {
3209 goto out;
3210 }
3211
3212 out:
3213 tran_finalize(tran, ret);
3214 /* child is unset on failure by bdrv_attach_child_common_abort() */
3215 assert((ret < 0) == !child);
3216
3217 bdrv_unref(child_bs);
3218
3219 return child;
3220 }
3221
3222 /* Callers must ensure that child->frozen is false. */
3223 void bdrv_root_unref_child(BdrvChild *child)
3224 {
3225 BlockDriverState *child_bs;
3226
3227 GLOBAL_STATE_CODE();
3228
3229 child_bs = child->bs;
3230 bdrv_detach_child(&child);
3231 bdrv_unref(child_bs);
3232 }
3233
3234 typedef struct BdrvSetInheritsFrom {
3235 BlockDriverState *bs;
3236 BlockDriverState *old_inherits_from;
3237 } BdrvSetInheritsFrom;
3238
3239 static void bdrv_set_inherits_from_abort(void *opaque)
3240 {
3241 BdrvSetInheritsFrom *s = opaque;
3242
3243 s->bs->inherits_from = s->old_inherits_from;
3244 }
3245
3246 static TransactionActionDrv bdrv_set_inherits_from_drv = {
3247 .abort = bdrv_set_inherits_from_abort,
3248 .clean = g_free,
3249 };
3250
3251 /* @tran is allowed to be NULL. In this case no rollback is possible */
3252 static void bdrv_set_inherits_from(BlockDriverState *bs,
3253 BlockDriverState *new_inherits_from,
3254 Transaction *tran)
3255 {
3256 if (tran) {
3257 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
3258
3259 *s = (BdrvSetInheritsFrom) {
3260 .bs = bs,
3261 .old_inherits_from = bs->inherits_from,
3262 };
3263
3264 tran_add(tran, &bdrv_set_inherits_from_drv, s);
3265 }
3266
3267 bs->inherits_from = new_inherits_from;
3268 }
3269
3270 /**
3271 * Clear all inherits_from pointers from children and grandchildren of
3272 * @root that point to @root, where necessary.
3273 * @tran is allowed to be NULL. In this case no rollback is possible
3274 */
3275 static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
3276 Transaction *tran)
3277 {
3278 BdrvChild *c;
3279
3280 if (child->bs->inherits_from == root) {
3281 /*
3282 * Remove inherits_from only when the last reference between root and
3283 * child->bs goes away.
3284 */
3285 QLIST_FOREACH(c, &root->children, next) {
3286 if (c != child && c->bs == child->bs) {
3287 break;
3288 }
3289 }
3290 if (c == NULL) {
3291 bdrv_set_inherits_from(child->bs, NULL, tran);
3292 }
3293 }
3294
3295 QLIST_FOREACH(c, &child->bs->children, next) {
3296 bdrv_unset_inherits_from(root, c, tran);
3297 }
3298 }
3299
3300 /* Callers must ensure that child->frozen is false. */
3301 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
3302 {
3303 GLOBAL_STATE_CODE();
3304 if (child == NULL) {
3305 return;
3306 }
3307
3308 bdrv_unset_inherits_from(parent, child, NULL);
3309 bdrv_root_unref_child(child);
3310 }
3311
3312
3313 static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
3314 {
3315 BdrvChild *c;
3316 GLOBAL_STATE_CODE();
3317 QLIST_FOREACH(c, &bs->parents, next_parent) {
3318 if (c->klass->change_media) {
3319 c->klass->change_media(c, load);
3320 }
3321 }
3322 }
3323
3324 /* Return true if you can reach parent going through child->inherits_from
3325 * recursively. If parent or child are NULL, return false */
3326 static bool bdrv_inherits_from_recursive(BlockDriverState *child,
3327 BlockDriverState *parent)
3328 {
3329 while (child && child != parent) {
3330 child = child->inherits_from;
3331 }
3332
3333 return child != NULL;
3334 }
3335
3336 /*
3337 * Return the BdrvChildRole for @bs's backing child. bs->backing is
3338 * mostly used for COW backing children (role = COW), but also for
3339 * filtered children (role = FILTERED | PRIMARY).
3340 */
3341 static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
3342 {
3343 if (bs->drv && bs->drv->is_filter) {
3344 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3345 } else {
3346 return BDRV_CHILD_COW;
3347 }
3348 }
3349
3350 /*
3351 * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
3352 * callers which don't need their own reference any more must call bdrv_unref().
3353 *
3354 * Function doesn't update permissions, caller is responsible for this.
3355 */
3356 static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
3357 BlockDriverState *child_bs,
3358 bool is_backing,
3359 Transaction *tran, Error **errp)
3360 {
3361 int ret = 0;
3362 bool update_inherits_from =
3363 bdrv_inherits_from_recursive(child_bs, parent_bs);
3364 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
3365 BdrvChildRole role;
3366
3367 GLOBAL_STATE_CODE();
3368
3369 if (!parent_bs->drv) {
3370 /*
3371 * Node without drv is an object without a class :/. TODO: finally fix
3372 * qcow2 driver to never clear bs->drv and implement format corruption
3373 * handling in other way.
3374 */
3375 error_setg(errp, "Node corrupted");
3376 return -EINVAL;
3377 }
3378
3379 if (child && child->frozen) {
3380 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
3381 child->name, parent_bs->node_name, child->bs->node_name);
3382 return -EPERM;
3383 }
3384
3385 if (is_backing && !parent_bs->drv->is_filter &&
3386 !parent_bs->drv->supports_backing)
3387 {
3388 error_setg(errp, "Driver '%s' of node '%s' does not support backing "
3389 "files", parent_bs->drv->format_name, parent_bs->node_name);
3390 return -EINVAL;
3391 }
3392
3393 if (parent_bs->drv->is_filter) {
3394 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3395 } else if (is_backing) {
3396 role = BDRV_CHILD_COW;
3397 } else {
3398 /*
3399 * We only can use same role as it is in existing child. We don't have
3400 * infrastructure to determine role of file child in generic way
3401 */
3402 if (!child) {
3403 error_setg(errp, "Cannot set file child to format node without "
3404 "file child");
3405 return -EINVAL;
3406 }
3407 role = child->role;
3408 }
3409
3410 if (child) {
3411 bdrv_unset_inherits_from(parent_bs, child, tran);
3412 bdrv_remove_file_or_backing_child(parent_bs, child, tran);
3413 }
3414
3415 if (!child_bs) {
3416 goto out;
3417 }
3418
3419 ret = bdrv_attach_child_noperm(parent_bs, child_bs,
3420 is_backing ? "backing" : "file",
3421 &child_of_bds, role,
3422 is_backing ? &parent_bs->backing :
3423 &parent_bs->file,
3424 tran, errp);
3425 if (ret < 0) {
3426 return ret;
3427 }
3428
3429
3430 /*
3431 * If inherits_from pointed recursively to bs then let's update it to
3432 * point directly to bs (else it will become NULL).
3433 */
3434 if (update_inherits_from) {
3435 bdrv_set_inherits_from(child_bs, parent_bs, tran);
3436 }
3437
3438 out:
3439 bdrv_refresh_limits(parent_bs, tran, NULL);
3440
3441 return 0;
3442 }
3443
3444 static int bdrv_set_backing_noperm(BlockDriverState *bs,
3445 BlockDriverState *backing_hd,
3446 Transaction *tran, Error **errp)
3447 {
3448 GLOBAL_STATE_CODE();
3449 return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
3450 }
3451
3452 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
3453 Error **errp)
3454 {
3455 int ret;
3456 Transaction *tran = tran_new();
3457
3458 GLOBAL_STATE_CODE();
3459 bdrv_drained_begin(bs);
3460
3461 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
3462 if (ret < 0) {
3463 goto out;
3464 }
3465
3466 ret = bdrv_refresh_perms(bs, errp);
3467 out:
3468 tran_finalize(tran, ret);
3469
3470 bdrv_drained_end(bs);
3471
3472 return ret;
3473 }
3474
3475 /*
3476 * Opens the backing file for a BlockDriverState if not yet open
3477 *
3478 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
3479 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3480 * itself, all options starting with "${bdref_key}." are considered part of the
3481 * BlockdevRef.
3482 *
3483 * TODO Can this be unified with bdrv_open_image()?
3484 */
3485 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
3486 const char *bdref_key, Error **errp)
3487 {
3488 char *backing_filename = NULL;
3489 char *bdref_key_dot;
3490 const char *reference = NULL;
3491 int ret = 0;
3492 bool implicit_backing = false;
3493 BlockDriverState *backing_hd;
3494 QDict *options;
3495 QDict *tmp_parent_options = NULL;
3496 Error *local_err = NULL;
3497
3498 GLOBAL_STATE_CODE();
3499
3500 if (bs->backing != NULL) {
3501 goto free_exit;
3502 }
3503
3504 /* NULL means an empty set of options */
3505 if (parent_options == NULL) {
3506 tmp_parent_options = qdict_new();
3507 parent_options = tmp_parent_options;
3508 }
3509
3510 bs->open_flags &= ~BDRV_O_NO_BACKING;
3511
3512 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3513 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
3514 g_free(bdref_key_dot);
3515
3516 /*
3517 * Caution: while qdict_get_try_str() is fine, getting non-string
3518 * types would require more care. When @parent_options come from
3519 * -blockdev or blockdev_add, its members are typed according to
3520 * the QAPI schema, but when they come from -drive, they're all
3521 * QString.
3522 */
3523 reference = qdict_get_try_str(parent_options, bdref_key);
3524 if (reference || qdict_haskey(options, "file.filename")) {
3525 /* keep backing_filename NULL */
3526 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
3527 qobject_unref(options);
3528 goto free_exit;
3529 } else {
3530 if (qdict_size(options) == 0) {
3531 /* If the user specifies options that do not modify the
3532 * backing file's behavior, we might still consider it the
3533 * implicit backing file. But it's easier this way, and
3534 * just specifying some of the backing BDS's options is
3535 * only possible with -drive anyway (otherwise the QAPI
3536 * schema forces the user to specify everything). */
3537 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
3538 }
3539
3540 backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
3541 if (local_err) {
3542 ret = -EINVAL;
3543 error_propagate(errp, local_err);
3544 qobject_unref(options);
3545 goto free_exit;
3546 }
3547 }
3548
3549 if (!bs->drv || !bs->drv->supports_backing) {
3550 ret = -EINVAL;
3551 error_setg(errp, "Driver doesn't support backing files");
3552 qobject_unref(options);
3553 goto free_exit;
3554 }
3555
3556 if (!reference &&
3557 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
3558 qdict_put_str(options, "driver", bs->backing_format);
3559 }
3560
3561 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
3562 &child_of_bds, bdrv_backing_role(bs), errp);
3563 if (!backing_hd) {
3564 bs->open_flags |= BDRV_O_NO_BACKING;
3565 error_prepend(errp, "Could not open backing file: ");
3566 ret = -EINVAL;
3567 goto free_exit;
3568 }
3569
3570 if (implicit_backing) {
3571 bdrv_refresh_filename(backing_hd);
3572 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3573 backing_hd->filename);
3574 }
3575
3576 /* Hook up the backing file link; drop our reference, bs owns the
3577 * backing_hd reference now */
3578 ret = bdrv_set_backing_hd(bs, backing_hd, errp);
3579 bdrv_unref(backing_hd);
3580 if (ret < 0) {
3581 goto free_exit;
3582 }
3583
3584 qdict_del(parent_options, bdref_key);
3585
3586 free_exit:
3587 g_free(backing_filename);
3588 qobject_unref(tmp_parent_options);
3589 return ret;
3590 }
3591
3592 static BlockDriverState *
3593 bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
3594 BlockDriverState *parent, const BdrvChildClass *child_class,
3595 BdrvChildRole child_role, bool allow_none, Error **errp)
3596 {
3597 BlockDriverState *bs = NULL;
3598 QDict *image_options;
3599 char *bdref_key_dot;
3600 const char *reference;
3601
3602 assert(child_class != NULL);
3603
3604 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3605 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3606 g_free(bdref_key_dot);
3607
3608 /*
3609 * Caution: while qdict_get_try_str() is fine, getting non-string
3610 * types would require more care. When @options come from
3611 * -blockdev or blockdev_add, its members are typed according to
3612 * the QAPI schema, but when they come from -drive, they're all
3613 * QString.
3614 */
3615 reference = qdict_get_try_str(options, bdref_key);
3616 if (!filename && !reference && !qdict_size(image_options)) {
3617 if (!allow_none) {
3618 error_setg(errp, "A block device must be specified for \"%s\"",
3619 bdref_key);
3620 }
3621 qobject_unref(image_options);
3622 goto done;
3623 }
3624
3625 bs = bdrv_open_inherit(filename, reference, image_options, 0,
3626 parent, child_class, child_role, errp);
3627 if (!bs) {
3628 goto done;
3629 }
3630
3631 done:
3632 qdict_del(options, bdref_key);
3633 return bs;
3634 }
3635
3636 /*
3637 * Opens a disk image whose options are given as BlockdevRef in another block
3638 * device's options.
3639 *
3640 * If allow_none is true, no image will be opened if filename is false and no
3641 * BlockdevRef is given. NULL will be returned, but errp remains unset.
3642 *
3643 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3644 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3645 * itself, all options starting with "${bdref_key}." are considered part of the
3646 * BlockdevRef.
3647 *
3648 * The BlockdevRef will be removed from the options QDict.
3649 */
3650 BdrvChild *bdrv_open_child(const char *filename,
3651 QDict *options, const char *bdref_key,
3652 BlockDriverState *parent,
3653 const BdrvChildClass *child_class,
3654 BdrvChildRole child_role,
3655 bool allow_none, Error **errp)
3656 {
3657 BlockDriverState *bs;
3658
3659 GLOBAL_STATE_CODE();
3660
3661 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
3662 child_role, allow_none, errp);
3663 if (bs == NULL) {
3664 return NULL;
3665 }
3666
3667 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3668 errp);
3669 }
3670
3671 /*
3672 * TODO Future callers may need to specify parent/child_class in order for
3673 * option inheritance to work. Existing callers use it for the root node.
3674 */
3675 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3676 {
3677 BlockDriverState *bs = NULL;
3678 QObject *obj = NULL;
3679 QDict *qdict = NULL;
3680 const char *reference = NULL;
3681 Visitor *v = NULL;
3682
3683 GLOBAL_STATE_CODE();
3684
3685 if (ref->type == QTYPE_QSTRING) {
3686 reference = ref->u.reference;
3687 } else {
3688 BlockdevOptions *options = &ref->u.definition;
3689 assert(ref->type == QTYPE_QDICT);
3690
3691 v = qobject_output_visitor_new(&obj);
3692 visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
3693 visit_complete(v, &obj);
3694
3695 qdict = qobject_to(QDict, obj);
3696 qdict_flatten(qdict);
3697
3698 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3699 * compatibility with other callers) rather than what we want as the
3700 * real defaults. Apply the defaults here instead. */
3701 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3702 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3703 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
3704 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3705
3706 }
3707
3708 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
3709 obj = NULL;
3710 qobject_unref(obj);
3711 visit_free(v);
3712 return bs;
3713 }
3714
3715 static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3716 int flags,
3717 QDict *snapshot_options,
3718 Error **errp)
3719 {
3720 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
3721 char *tmp_filename = g_malloc0(PATH_MAX + 1);
3722 int64_t total_size;
3723 QemuOpts *opts = NULL;
3724 BlockDriverState *bs_snapshot = NULL;
3725 int ret;
3726
3727 GLOBAL_STATE_CODE();
3728
3729 /* if snapshot, we create a temporary backing file and open it
3730 instead of opening 'filename' directly */
3731
3732 /* Get the required size from the image */
3733 total_size = bdrv_getlength(bs);
3734 if (total_size < 0) {
3735 error_setg_errno(errp, -total_size, "Could not get image size");
3736 goto out;
3737 }
3738
3739 /* Create the temporary image */
3740 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
3741 if (ret < 0) {
3742 error_setg_errno(errp, -ret, "Could not get temporary filename");
3743 goto out;
3744 }
3745
3746 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
3747 &error_abort);
3748 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
3749 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
3750 qemu_opts_del(opts);
3751 if (ret < 0) {
3752 error_prepend(errp, "Could not create temporary overlay '%s': ",
3753 tmp_filename);
3754 goto out;
3755 }
3756
3757 /* Prepare options QDict for the temporary file */
3758 qdict_put_str(snapshot_options, "file.driver", "file");
3759 qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3760 qdict_put_str(snapshot_options, "driver", "qcow2");
3761
3762 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
3763 snapshot_options = NULL;
3764 if (!bs_snapshot) {
3765 goto out;
3766 }
3767
3768 ret = bdrv_append(bs_snapshot, bs, errp);
3769 if (ret < 0) {
3770 bs_snapshot = NULL;
3771 goto out;
3772 }
3773
3774 out:
3775 qobject_unref(snapshot_options);
3776 g_free(tmp_filename);
3777 return bs_snapshot;
3778 }
3779
3780 /*
3781 * Opens a disk image (raw, qcow2, vmdk, ...)
3782 *
3783 * options is a QDict of options to pass to the block drivers, or NULL for an
3784 * empty set of options. The reference to the QDict belongs to the block layer
3785 * after the call (even on failure), so if the caller intends to reuse the
3786 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
3787 *
3788 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3789 * If it is not NULL, the referenced BDS will be reused.
3790 *
3791 * The reference parameter may be used to specify an existing block device which
3792 * should be opened. If specified, neither options nor a filename may be given,
3793 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
3794 */
3795 static BlockDriverState *bdrv_open_inherit(const char *filename,
3796 const char *reference,
3797 QDict *options, int flags,
3798 BlockDriverState *parent,
3799 const BdrvChildClass *child_class,
3800 BdrvChildRole child_role,
3801 Error **errp)
3802 {
3803 int ret;
3804 BlockBackend *file = NULL;
3805 BlockDriverState *bs;
3806 BlockDriver *drv = NULL;
3807 BdrvChild *child;
3808 const char *drvname;
3809 const char *backing;
3810 Error *local_err = NULL;
3811 QDict *snapshot_options = NULL;
3812 int snapshot_flags = 0;
3813
3814 assert(!child_class || !flags);
3815 assert(!child_class == !parent);
3816 GLOBAL_STATE_CODE();
3817
3818 if (reference) {
3819 bool options_non_empty = options ? qdict_size(options) : false;
3820 qobject_unref(options);
3821
3822 if (filename || options_non_empty) {
3823 error_setg(errp, "Cannot reference an existing block device with "
3824 "additional options or a new filename");
3825 return NULL;
3826 }
3827
3828 bs = bdrv_lookup_bs(reference, reference, errp);
3829 if (!bs) {
3830 return NULL;
3831 }
3832
3833 bdrv_ref(bs);
3834 return bs;
3835 }
3836
3837 bs = bdrv_new();
3838
3839 /* NULL means an empty set of options */
3840 if (options == NULL) {
3841 options = qdict_new();
3842 }
3843
3844 /* json: syntax counts as explicit options, as if in the QDict */
3845 parse_json_protocol(options, &filename, &local_err);
3846 if (local_err) {
3847 goto fail;
3848 }
3849
3850 bs->explicit_options = qdict_clone_shallow(options);
3851
3852 if (child_class) {
3853 bool parent_is_format;
3854
3855 if (parent->drv) {
3856 parent_is_format = parent->drv->is_format;
3857 } else {
3858 /*
3859 * parent->drv is not set yet because this node is opened for
3860 * (potential) format probing. That means that @parent is going
3861 * to be a format node.
3862 */
3863 parent_is_format = true;
3864 }
3865
3866 bs->inherits_from = parent;
3867 child_class->inherit_options(child_role, parent_is_format,
3868 &flags, options,
3869 parent->open_flags, parent->options);
3870 }
3871
3872 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
3873 if (ret < 0) {
3874 goto fail;
3875 }
3876
3877 /*
3878 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3879 * Caution: getting a boolean member of @options requires care.
3880 * When @options come from -blockdev or blockdev_add, members are
3881 * typed according to the QAPI schema, but when they come from
3882 * -drive, they're all QString.
3883 */
3884 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3885 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3886 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3887 } else {
3888 flags &= ~BDRV_O_RDWR;
3889 }
3890
3891 if (flags & BDRV_O_SNAPSHOT) {
3892 snapshot_options = qdict_new();
3893 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3894 flags, options);
3895 /* Let bdrv_backing_options() override "read-only" */
3896 qdict_del(options, BDRV_OPT_READ_ONLY);
3897 bdrv_inherited_options(BDRV_CHILD_COW, true,
3898 &flags, options, flags, options);
3899 }
3900
3901 bs->open_flags = flags;
3902 bs->options = options;
3903 options = qdict_clone_shallow(options);
3904
3905 /* Find the right image format driver */
3906 /* See cautionary note on accessing @options above */
3907 drvname = qdict_get_try_str(options, "driver");
3908 if (drvname) {
3909 drv = bdrv_find_format(drvname);
3910 if (!drv) {
3911 error_setg(errp, "Unknown driver: '%s'", drvname);
3912 goto fail;
3913 }
3914 }
3915
3916 assert(drvname || !(flags & BDRV_O_PROTOCOL));
3917
3918 /* See cautionary note on accessing @options above */
3919 backing = qdict_get_try_str(options, "backing");
3920 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3921 (backing && *backing == '\0'))
3922 {
3923 if (backing) {
3924 warn_report("Use of \"backing\": \"\" is deprecated; "
3925 "use \"backing\": null instead");
3926 }
3927 flags |= BDRV_O_NO_BACKING;
3928 qdict_del(bs->explicit_options, "backing");
3929 qdict_del(bs->options, "backing");
3930 qdict_del(options, "backing");
3931 }
3932
3933 /* Open image file without format layer. This BlockBackend is only used for
3934 * probing, the block drivers will do their own bdrv_open_child() for the
3935 * same BDS, which is why we put the node name back into options. */
3936 if ((flags & BDRV_O_PROTOCOL) == 0) {
3937 BlockDriverState *file_bs;
3938
3939 file_bs = bdrv_open_child_bs(filename, options, "file", bs,
3940 &child_of_bds, BDRV_CHILD_IMAGE,
3941 true, &local_err);
3942 if (local_err) {
3943 goto fail;
3944 }
3945 if (file_bs != NULL) {
3946 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3947 * looking at the header to guess the image format. This works even
3948 * in cases where a guest would not see a consistent state. */
3949 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
3950 blk_insert_bs(file, file_bs, &local_err);
3951 bdrv_unref(file_bs);
3952 if (local_err) {
3953 goto fail;
3954 }
3955
3956 qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
3957 }
3958 }
3959
3960 /* Image format probing */
3961 bs->probed = !drv;
3962 if (!drv && file) {
3963 ret = find_image_format(file, filename, &drv, &local_err);
3964 if (ret < 0) {
3965 goto fail;
3966 }
3967 /*
3968 * This option update would logically belong in bdrv_fill_options(),
3969 * but we first need to open bs->file for the probing to work, while
3970 * opening bs->file already requires the (mostly) final set of options
3971 * so that cache mode etc. can be inherited.
3972 *
3973 * Adding the driver later is somewhat ugly, but it's not an option
3974 * that would ever be inherited, so it's correct. We just need to make
3975 * sure to update both bs->options (which has the full effective
3976 * options for bs) and options (which has file.* already removed).
3977 */
3978 qdict_put_str(bs->options, "driver", drv->format_name);
3979 qdict_put_str(options, "driver", drv->format_name);
3980 } else if (!drv) {
3981 error_setg(errp, "Must specify either driver or file");
3982 goto fail;
3983 }
3984
3985 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
3986 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
3987 /* file must be NULL if a protocol BDS is about to be created
3988 * (the inverse results in an error message from bdrv_open_common()) */
3989 assert(!(flags & BDRV_O_PROTOCOL) || !file);
3990
3991 /* Open the image */
3992 ret = bdrv_open_common(bs, file, options, &local_err);
3993 if (ret < 0) {
3994 goto fail;
3995 }
3996
3997 if (file) {
3998 blk_unref(file);
3999 file = NULL;
4000 }
4001
4002 /* If there is a backing file, use it */
4003 if ((flags & BDRV_O_NO_BACKING) == 0) {
4004 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
4005 if (ret < 0) {
4006 goto close_and_fail;
4007 }
4008 }
4009
4010 /* Remove all children options and references
4011 * from bs->options and bs->explicit_options */
4012 QLIST_FOREACH(child, &bs->children, next) {
4013 char *child_key_dot;
4014 child_key_dot = g_strdup_printf("%s.", child->name);
4015 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
4016 qdict_extract_subqdict(bs->options, NULL, child_key_dot);
4017 qdict_del(bs->explicit_options, child->name);
4018 qdict_del(bs->options, child->name);
4019 g_free(child_key_dot);
4020 }
4021
4022 /* Check if any unknown options were used */
4023 if (qdict_size(options) != 0) {
4024 const QDictEntry *entry = qdict_first(options);
4025 if (flags & BDRV_O_PROTOCOL) {
4026 error_setg(errp, "Block protocol '%s' doesn't support the option "
4027 "'%s'", drv->format_name, entry->key);
4028 } else {
4029 error_setg(errp,
4030 "Block format '%s' does not support the option '%s'",
4031 drv->format_name, entry->key);
4032 }
4033
4034 goto close_and_fail;
4035 }
4036
4037 bdrv_parent_cb_change_media(bs, true);
4038
4039 qobject_unref(options);
4040 options = NULL;
4041
4042 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
4043 * temporary snapshot afterwards. */
4044 if (snapshot_flags) {
4045 BlockDriverState *snapshot_bs;
4046 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
4047 snapshot_options, &local_err);
4048 snapshot_options = NULL;
4049 if (local_err) {
4050 goto close_and_fail;
4051 }
4052 /* We are not going to return bs but the overlay on top of it
4053 * (snapshot_bs); thus, we have to drop the strong reference to bs
4054 * (which we obtained by calling bdrv_new()). bs will not be deleted,
4055 * though, because the overlay still has a reference to it. */
4056 bdrv_unref(bs);
4057 bs = snapshot_bs;
4058 }
4059
4060 return bs;
4061
4062 fail:
4063 blk_unref(file);
4064 qobject_unref(snapshot_options);
4065 qobject_unref(bs->explicit_options);
4066 qobject_unref(bs->options);
4067 qobject_unref(options);
4068 bs->options = NULL;
4069 bs->explicit_options = NULL;
4070 bdrv_unref(bs);
4071 error_propagate(errp, local_err);
4072 return NULL;
4073
4074 close_and_fail:
4075 bdrv_unref(bs);
4076 qobject_unref(snapshot_options);
4077 qobject_unref(options);
4078 error_propagate(errp, local_err);
4079 return NULL;
4080 }
4081
4082 BlockDriverState *bdrv_open(const char *filename, const char *reference,
4083 QDict *options, int flags, Error **errp)
4084 {
4085 GLOBAL_STATE_CODE();
4086
4087 return bdrv_open_inherit(filename, reference, options, flags, NULL,
4088 NULL, 0, errp);
4089 }
4090
4091 /* Return true if the NULL-terminated @list contains @str */
4092 static bool is_str_in_list(const char *str, const char *const *list)
4093 {
4094 if (str && list) {
4095 int i;
4096 for (i = 0; list[i] != NULL; i++) {
4097 if (!strcmp(str, list[i])) {
4098 return true;
4099 }
4100 }
4101 }
4102 return false;
4103 }
4104