Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
[qemu.git] / block / qcow2.h
1 /*
2 * Block driver for the QCOW version 2 format
3 *
4 * Copyright (c) 2004-2006 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #ifndef BLOCK_QCOW2_H
26 #define BLOCK_QCOW2_H
27
28 #include "qemu/aes.h"
29 #include "block/coroutine.h"
30
31 //#define DEBUG_ALLOC
32 //#define DEBUG_ALLOC2
33 //#define DEBUG_EXT
34
35 #define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
36
37 #define QCOW_CRYPT_NONE 0
38 #define QCOW_CRYPT_AES 1
39
40 #define QCOW_MAX_CRYPT_CLUSTERS 32
41
42 /* indicate that the refcount of the referenced cluster is exactly one. */
43 #define QCOW_OFLAG_COPIED (1ULL << 63)
44 /* indicate that the cluster is compressed (they never have the copied flag) */
45 #define QCOW_OFLAG_COMPRESSED (1ULL << 62)
46 /* The cluster reads as all zeros */
47 #define QCOW_OFLAG_ZERO (1ULL << 0)
48
49 #define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
50
51 #define MIN_CLUSTER_BITS 9
52 #define MAX_CLUSTER_BITS 21
53
54 #define L2_CACHE_SIZE 16
55
56 /* Must be at least 4 to cover all cases of refcount table growth */
57 #define REFCOUNT_CACHE_SIZE 4
58
59 #define DEFAULT_CLUSTER_SIZE 65536
60
61
62 #define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
63 #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
64 #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
65 #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
66 #define QCOW2_OPT_OVERLAP "overlap-check"
67 #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
68 #define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
69 #define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
70 #define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table"
71 #define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block"
72 #define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
73 #define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
74 #define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
75
76 typedef struct QCowHeader {
77 uint32_t magic;
78 uint32_t version;
79 uint64_t backing_file_offset;
80 uint32_t backing_file_size;
81 uint32_t cluster_bits;
82 uint64_t size; /* in bytes */
83 uint32_t crypt_method;
84 uint32_t l1_size; /* XXX: save number of clusters instead ? */
85 uint64_t l1_table_offset;
86 uint64_t refcount_table_offset;
87 uint32_t refcount_table_clusters;
88 uint32_t nb_snapshots;
89 uint64_t snapshots_offset;
90
91 /* The following fields are only valid for version >= 3 */
92 uint64_t incompatible_features;
93 uint64_t compatible_features;
94 uint64_t autoclear_features;
95
96 uint32_t refcount_order;
97 uint32_t header_length;
98 } QEMU_PACKED QCowHeader;
99
100 typedef struct QCowSnapshot {
101 uint64_t l1_table_offset;
102 uint32_t l1_size;
103 char *id_str;
104 char *name;
105 uint64_t disk_size;
106 uint64_t vm_state_size;
107 uint32_t date_sec;
108 uint32_t date_nsec;
109 uint64_t vm_clock_nsec;
110 } QCowSnapshot;
111
112 struct Qcow2Cache;
113 typedef struct Qcow2Cache Qcow2Cache;
114
115 typedef struct Qcow2UnknownHeaderExtension {
116 uint32_t magic;
117 uint32_t len;
118 QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
119 uint8_t data[];
120 } Qcow2UnknownHeaderExtension;
121
122 enum {
123 QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
124 QCOW2_FEAT_TYPE_COMPATIBLE = 1,
125 QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
126 };
127
128 /* Incompatible feature bits */
129 enum {
130 QCOW2_INCOMPAT_DIRTY_BITNR = 0,
131 QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
132 QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
133 QCOW2_INCOMPAT_CORRUPT = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
134
135 QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY
136 | QCOW2_INCOMPAT_CORRUPT,
137 };
138
139 /* Compatible feature bits */
140 enum {
141 QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
142 QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
143
144 QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
145 };
146
147 enum qcow2_discard_type {
148 QCOW2_DISCARD_NEVER = 0,
149 QCOW2_DISCARD_ALWAYS,
150 QCOW2_DISCARD_REQUEST,
151 QCOW2_DISCARD_SNAPSHOT,
152 QCOW2_DISCARD_OTHER,
153 QCOW2_DISCARD_MAX
154 };
155
156 typedef struct Qcow2Feature {
157 uint8_t type;
158 uint8_t bit;
159 char name[46];
160 } QEMU_PACKED Qcow2Feature;
161
162 typedef struct Qcow2DiscardRegion {
163 BlockDriverState *bs;
164 uint64_t offset;
165 uint64_t bytes;
166 QTAILQ_ENTRY(Qcow2DiscardRegion) next;
167 } Qcow2DiscardRegion;
168
169 typedef struct BDRVQcowState {
170 int cluster_bits;
171 int cluster_size;
172 int cluster_sectors;
173 int l2_bits;
174 int l2_size;
175 int l1_size;
176 int l1_vm_state_index;
177 int csize_shift;
178 int csize_mask;
179 uint64_t cluster_offset_mask;
180 uint64_t l1_table_offset;
181 uint64_t *l1_table;
182
183 Qcow2Cache* l2_table_cache;
184 Qcow2Cache* refcount_block_cache;
185
186 uint8_t *cluster_cache;
187 uint8_t *cluster_data;
188 uint64_t cluster_cache_offset;
189 QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
190
191 uint64_t *refcount_table;
192 uint64_t refcount_table_offset;
193 uint32_t refcount_table_size;
194 int64_t free_cluster_index;
195 int64_t free_byte_offset;
196
197 CoMutex lock;
198
199 uint32_t crypt_method; /* current crypt method, 0 if no key yet */
200 uint32_t crypt_method_header;
201 AES_KEY aes_encrypt_key;
202 AES_KEY aes_decrypt_key;
203 uint64_t snapshots_offset;
204 int snapshots_size;
205 int nb_snapshots;
206 QCowSnapshot *snapshots;
207
208 int flags;
209 int qcow_version;
210 bool use_lazy_refcounts;
211 int refcount_order;
212
213 bool discard_passthrough[QCOW2_DISCARD_MAX];
214
215 int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
216
217 uint64_t incompatible_features;
218 uint64_t compatible_features;
219 uint64_t autoclear_features;
220
221 size_t unknown_header_fields_size;
222 void* unknown_header_fields;
223 QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
224 QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
225 bool cache_discards;
226 } BDRVQcowState;
227
228 /* XXX: use std qcow open function ? */
229 typedef struct QCowCreateState {
230 int cluster_size;
231 int cluster_bits;
232 uint16_t *refcount_block;
233 uint64_t *refcount_table;
234 int64_t l1_table_offset;
235 int64_t refcount_table_offset;
236 int64_t refcount_block_offset;
237 } QCowCreateState;
238
239 struct QCowAIOCB;
240
241 typedef struct Qcow2COWRegion {
242 /**
243 * Offset of the COW region in bytes from the start of the first cluster
244 * touched by the request.
245 */
246 uint64_t offset;
247
248 /** Number of sectors to copy */
249 int nb_sectors;
250 } Qcow2COWRegion;
251
252 /**
253 * Describes an in-flight (part of a) write request that writes to clusters
254 * that are not referenced in their L2 table yet.
255 */
256 typedef struct QCowL2Meta
257 {
258 /** Guest offset of the first newly allocated cluster */
259 uint64_t offset;
260
261 /** Host offset of the first newly allocated cluster */
262 uint64_t alloc_offset;
263
264 /**
265 * Number of sectors from the start of the first allocated cluster to
266 * the end of the (possibly shortened) request
267 */
268 int nb_available;
269
270 /** Number of newly allocated clusters */
271 int nb_clusters;
272
273 /**
274 * Requests that overlap with this allocation and wait to be restarted
275 * when the allocating request has completed.
276 */
277 CoQueue dependent_requests;
278
279 /**
280 * The COW Region between the start of the first allocated cluster and the
281 * area the guest actually writes to.
282 */
283 Qcow2COWRegion cow_start;
284
285 /**
286 * The COW Region between the area the guest actually writes to and the
287 * end of the last allocated cluster.
288 */
289 Qcow2COWRegion cow_end;
290
291 /** Pointer to next L2Meta of the same write request */
292 struct QCowL2Meta *next;
293
294 QLIST_ENTRY(QCowL2Meta) next_in_flight;
295 } QCowL2Meta;
296
297 enum {
298 QCOW2_CLUSTER_UNALLOCATED,
299 QCOW2_CLUSTER_NORMAL,
300 QCOW2_CLUSTER_COMPRESSED,
301 QCOW2_CLUSTER_ZERO
302 };
303
304 typedef enum QCow2MetadataOverlap {
305 QCOW2_OL_MAIN_HEADER_BITNR = 0,
306 QCOW2_OL_ACTIVE_L1_BITNR = 1,
307 QCOW2_OL_ACTIVE_L2_BITNR = 2,
308 QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
309 QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
310 QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
311 QCOW2_OL_INACTIVE_L1_BITNR = 6,
312 QCOW2_OL_INACTIVE_L2_BITNR = 7,
313
314 QCOW2_OL_MAX_BITNR = 8,
315
316 QCOW2_OL_NONE = 0,
317 QCOW2_OL_MAIN_HEADER = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
318 QCOW2_OL_ACTIVE_L1 = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
319 QCOW2_OL_ACTIVE_L2 = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
320 QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
321 QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
322 QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
323 QCOW2_OL_INACTIVE_L1 = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
324 /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
325 * reads. */
326 QCOW2_OL_INACTIVE_L2 = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
327 } QCow2MetadataOverlap;
328
329 /* Perform all overlap checks which can be done in constant time */
330 #define QCOW2_OL_CONSTANT \
331 (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \
332 QCOW2_OL_SNAPSHOT_TABLE)
333
334 /* Perform all overlap checks which don't require disk access */
335 #define QCOW2_OL_CACHED \
336 (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \
337 QCOW2_OL_INACTIVE_L1)
338
339 /* Perform all overlap checks */
340 #define QCOW2_OL_ALL \
341 (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
342
343 #define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
344 #define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
345 #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
346
347 #define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
348
349 static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
350 {
351 return offset & ~(s->cluster_size - 1);
352 }
353
354 static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
355 {
356 return offset & (s->cluster_size - 1);
357 }
358
359 static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
360 {
361 return (size + (s->cluster_size - 1)) >> s->cluster_bits;
362 }
363
364 static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
365 {
366 int shift = s->cluster_bits + s->l2_bits;
367 return (size + (1ULL << shift) - 1) >> shift;
368 }
369
370 static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
371 {
372 return (offset >> s->cluster_bits) & (s->l2_size - 1);
373 }
374
375 static inline int64_t align_offset(int64_t offset, int n)
376 {
377 offset = (offset + n - 1) & ~(n - 1);
378 return offset;
379 }
380
381 static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
382 {
383 return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
384 }
385
386 static inline int qcow2_get_cluster_type(uint64_t l2_entry)
387 {
388 if (l2_entry & QCOW_OFLAG_COMPRESSED) {
389 return QCOW2_CLUSTER_COMPRESSED;
390 } else if (l2_entry & QCOW_OFLAG_ZERO) {
391 return QCOW2_CLUSTER_ZERO;
392 } else if (!(l2_entry & L2E_OFFSET_MASK)) {
393 return QCOW2_CLUSTER_UNALLOCATED;
394 } else {
395 return QCOW2_CLUSTER_NORMAL;
396 }
397 }
398
399 /* Check whether refcounts are eager or lazy */
400 static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
401 {
402 return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
403 }
404
405 static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
406 {
407 return m->offset + m->cow_start.offset;
408 }
409
410 static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
411 {
412 return m->offset + m->cow_end.offset
413 + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
414 }
415
416 // FIXME Need qcow2_ prefix to global functions
417
418 /* qcow2.c functions */
419 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
420 int64_t sector_num, int nb_sectors);
421
422 int qcow2_mark_dirty(BlockDriverState *bs);
423 int qcow2_mark_corrupt(BlockDriverState *bs);
424 int qcow2_mark_consistent(BlockDriverState *bs);
425 int qcow2_update_header(BlockDriverState *bs);
426
427 /* qcow2-refcount.c functions */
428 int qcow2_refcount_init(BlockDriverState *bs);
429 void qcow2_refcount_close(BlockDriverState *bs);
430
431 int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
432 int addend, enum qcow2_discard_type type);
433
434 int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
435 int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
436 int nb_clusters);
437 int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
438 void qcow2_free_clusters(BlockDriverState *bs,
439 int64_t offset, int64_t size,
440 enum qcow2_discard_type type);
441 void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
442 int nb_clusters, enum qcow2_discard_type type);
443
444 int qcow2_update_snapshot_refcount(BlockDriverState *bs,
445 int64_t l1_table_offset, int l1_size, int addend);
446
447 int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
448 BdrvCheckMode fix);
449
450 void qcow2_process_discards(BlockDriverState *bs, int ret);
451
452 int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
453 int64_t size);
454 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
455 int64_t size);
456
457 /* qcow2-cluster.c functions */
458 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
459 bool exact_size);
460 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
461 void qcow2_l2_cache_reset(BlockDriverState *bs);
462 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
463 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
464 uint8_t *out_buf, const uint8_t *in_buf,
465 int nb_sectors, int enc,
466 const AES_KEY *key);
467
468 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
469 int *num, uint64_t *cluster_offset);
470 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
471 int *num, uint64_t *host_offset, QCowL2Meta **m);
472 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
473 uint64_t offset,
474 int compressed_size);
475
476 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
477 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
478 int nb_sectors, enum qcow2_discard_type type);
479 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
480
481 int qcow2_expand_zero_clusters(BlockDriverState *bs);
482
483 /* qcow2-snapshot.c functions */
484 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
485 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
486 int qcow2_snapshot_delete(BlockDriverState *bs,
487 const char *snapshot_id,
488 const char *name,
489 Error **errp);
490 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
491 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
492 const char *snapshot_id,
493 const char *name,
494 Error **errp);
495
496 void qcow2_free_snapshots(BlockDriverState *bs);
497 int qcow2_read_snapshots(BlockDriverState *bs);
498
499 /* qcow2-cache.c functions */
500 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
501 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
502
503 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
504 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
505 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
506 Qcow2Cache *dependency);
507 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
508
509 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
510
511 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
512 void **table);
513 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
514 void **table);
515 int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
516
517 #endif