[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC V8 02/24] qcow2: Add deduplication structures and fiel
From: |
Benoît Canet |
Subject: |
[Qemu-devel] [RFC V8 02/24] qcow2: Add deduplication structures and fields. |
Date: |
Thu, 20 Jun 2013 16:26:10 +0200 |
Signed-off-by: Benoit Canet <address@hidden>
---
block/qcow2.h | 203 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 201 insertions(+), 2 deletions(-)
diff --git a/block/qcow2.h b/block/qcow2.h
index 9421843..953edfe 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -57,7 +57,182 @@
#define REFCOUNT_CACHE_SIZE 4
#define DEFAULT_CLUSTER_SIZE 65536
-
+#define DEFAULT_DEDUP_CLUSTER_SIZE 4096
+
+#define HASH_LENGTH 32
+
+/* indicate that this cluster hash has been deleted from the key value store */
+#define QCOW_DEDUP_DELETED (1LL << 61)
+/* indicate that the hash structure is empty and miss offset */
+#define QCOW_DEDUP_FLAG_EMPTY (1LL << 62)
+
+#define SSD_ERASE_BLOCK_SIZE (512 * 1024) /* match SSD erase block size */
+#define JOURNAL_CLUSTER_SIZE 4096 /* used to read entries */
+#define HASH_STORE_CLUSTER_SIZE 4096
+
+#define QCOW_LOG_END_SIZE 2 /* size of a end block journal entry */
+#define QCOW_LOG_STORE_ENTRY_USED (1LL << 60) /* mark used entry in table */
+#define QCOW_LOG_STORE_BUCKET_SIZE 4 /* size of a cuckoo hash bucket */
+#define QCOW_LOG_STORE_MAX_KICKS 128 /* max numbers of cuckoo hash kicks */
+#define QCOW_LOG_STORE_JOURNAL_RATIO 2 /* the ratio to compute the extra
+ * room the journal will take based
+ * on the log store size
+ */
+#define QCOW2_NB_INCARNATION_GOAL 128 /* targeted number of incarnation */
+
+#define QCOW_DEDUP_DIRTY 1 /* dirty flag in the qcow2 header extension */
+
+typedef enum {
+ QCOW_LOG_NONE = 0xFF, /* on SSD erased clusters will mark none */
+ QCOW_LOG_END = 1, /* end a block and point to the next */
+ QCOW_LOG_HASH = 2, /* used to journalize a QCowHashInfo */
+} QCowLogEntryType;
+
+typedef enum {
+ QCOW_HASH_SHA256 = 0,
+ QCOW_HASH_SHA3 = 1,
+ QCOW_HASH_SKEIN = 2,
+} QCowHashAlgo;
+
+typedef struct {
+ uint8_t data[HASH_LENGTH]; /* 32 bytes hash of a given cluster */
+} __attribute__((packed)) QCowHash;
+
+/* deduplication info */
+typedef struct {
+ QCowHash hash;
+ uint64_t physical_sect; /* where the cluster is stored on disk */
+ uint64_t first_logical_sect; /* logical sector of the first occurrence of
+ * this cluster
+ */
+} __attribute__((packed)) QCowHashInfo;
+
+/* Used to keep a single precomputed hash between the calls of the dedup
+ * function
+ */
+typedef struct {
+ QCowHashInfo hash_info;
+ bool reuse; /* The main deduplication function can set this field to
+ * true before exiting to avoid computing the same hash
+ * twice. It's a speed optimization.
+ */
+} QcowPersistentHash;
+
+/* Undedupable hashes that must be written later to disk */
+typedef struct QCowHashElement {
+ QCowHashInfo hash_info;
+ QTAILQ_ENTRY(QCowHashElement) next;
+} QCowHashElement;
+
+typedef struct {
+ QcowPersistentHash phash; /* contains a hash persisting between calls of
+ * qcow2_dedup()
+ */
+ QTAILQ_HEAD(, QCowHashElement) undedupables;
+ uint64_t nb_clusters_processed;
+ uint64_t nb_undedupable_sectors;
+} QCowDedupState;
+
+/* The code must take care that the maximum size field of a QCowJournalEntry
+ * will be no more than 254 bytes.
+ * It's required to save the 2 bytes of room for QCOW_LOG_END entries
+ * in every cases
+ */
+typedef union {
+ QCowHashInfo hash_info;
+ uint8_t padding[254]; /* note the extra two bytes of padding to avoid
+ * read overflow.
+ */
+} QCowJournalEntryUnion;
+
+typedef struct {
+ uint8_t size; /* maximum size of a journal entry is 254 bytes */
+ uint8_t type; /* contains a QCowLogEntryType for future usage */
+ QCowJournalEntryUnion u;
+} __attribute__((packed)) QCowJournalEntry;
+
+typedef struct {
+ uint64_t sector; /* the journal physical on disk sector */
+ uint64_t size; /* the size of the journal in bytes */
+ uint64_t index; /* index of next buf cluster to write */
+ uint8_t *write_buf; /* used to buffer written data */
+ uint64_t offset_in_buf; /* the offset in the write buffer */
+ bool flushed; /* true if the buffer reached disk*/
+ uint8_t *read_cache; /* used to cache read data */
+ int64_t read_index; /* index the cached read cluster */
+ bool started; /* has the journal been resumed */
+} QCowJournal;
+
+typedef struct {
+ QCowJournal journal; /* the journal this log store will use */
+ uint32_t order; /* the number of bits used for the sub hashes
+ * as sub hashes will be used as an index for
+ * locating each bucket nb_bucket = 2^order
+ */
+ uint16_t nb_kicks; /* the number of cuckoo hash kicks done */
+ bool *kick_map; /* take care of not doing kick path loops */
+ QCowHashInfo *hash_table; /* nb_buckets * QCOW_LOG_STORE_BUCKET_SIZE */
+ QCowHashInfo *hash_table_copy; /* copy of the hash table for packing */
+
+ /* members required to freeze a log store into a hash store consumable
+ * hash table (incarnation)
+ */
+ uint8_t *write_buf; /* the buffer used to write */
+ uint64_t write_buf_offset; /* the on disk offset of the buffer */
+ uint32_t in_buf_offset; /* the current offset in the write buffer */
+} QCowLogStore;
+
+/* a QCowIncarnation is a frozen QCowLogStore
+ * Freezes are read only and their in ram filters is queried from the youngest
+ * to the oldest in order to know if their hash table contains a given
+ * QCowHashInfo.
+ * If so a read is issued to retrieve the QCowHashInfo.
+ */
+typedef struct QCowIncarnation {
+ uint64_t filter_offset; /* the on disk offset of the ram filter */
+ uint64_t hash_table_offset; /* the on disk offset of the hash table
+ * (should be just after the end of the
filter)
+ */
+ uint64_t size; /* the on disk size of the incarnation */
+ uint8_t *filter; /* an in ram filter */
+ QTAILQ_ENTRY(QCowIncarnation) next;
+} QCowIncarnation;
+
+typedef struct QCowLimbo {
+ uint64_t offset; /* the on disk offset of the to reincarnate
+ * disk space
+ */
+ QTAILQ_ENTRY(QCowLimbo) next;
+} QCowLimbo;
+
+typedef struct {
+ uint32_t order; /* the number of bits used for the sub hashes
+ * as sub hashes will be used as an index for
+ * locating each bucket nb_bucket = 2^order
+ */
+ uint32_t nb_incarnations; /* the current number of incarnations */
+ uint32_t nb_in_limbo; /* the number of dead incarnations */
+ QTAILQ_HEAD(incarnations_head, QCowIncarnation) incarnations;
+ /* a list of frozen hash table
+ * ordered from the youngest
+ * to the oldest
+ */
+ QTAILQ_HEAD(in_limbo_head, QCowLimbo) in_limbo;
+ /* a list of dead incarnation which disk
+ * space can be recycled
+ */
+} QCowHashStore;
+
+typedef struct {
+ uint32_t order;
+ QCowLogStore log_store; /* the current log store */
+ QCowLogStore frozen_log_store; /* the log store to incarnate */
+ bool freezing; /* are we incarnating a log store */
+ QCowHashStore hash_store;
+ CoMutex insert_lock; /* used to prevent multiple freeze
attempts
+ * at the same time
+ */
+} QCowStore;
#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
@@ -117,8 +292,10 @@ enum {
enum {
QCOW2_INCOMPAT_DIRTY_BITNR = 0,
QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+ QCOW2_INCOMPAT_DEDUP_BITNR = 1,
+ QCOW2_INCOMPAT_DEDUP = 1 << QCOW2_INCOMPAT_DEDUP_BITNR,
- QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY | QCOW2_INCOMPAT_DEDUP,
};
/* Compatible feature bits */
@@ -163,6 +340,28 @@ typedef struct BDRVQcowState {
int64_t free_cluster_index;
int64_t free_byte_offset;
+ bool has_dedup; /* indicate if this image has dedup */
+
+ /* the following fields are saved in QCOW2 dedup header extension */
+ bool dedup_dirty; /* mapped to the header dirty flag */
+ uint64_t dedup_conf_offset; /* disk offset of the dedup config */
+ size_t dedup_conf_size; /* disk size of the dedup config */
+ QCowHashAlgo dedup_hash_algo; /* the cryptographic hash algo used */
+ uint32_t dedup_max_incarnations; /* the maximum number of incarnations
in
+ * hash store -> oldest incarnation
will
+ * be dropped. It's harmless for the
+ * dedup and allow to scale.
+ * The whole thing act as a FIFO or LRU
+ * if value is 0 there is no limits
+ */
+
+ QCowStore key_value_store; /* the key value store used to store
+ * dedup hashes
+ */
+ int freeze_errno; /* catch errors when incarnating */
+ Coroutine *freeze_co; /* coroutine used when freezing */
+ Coroutine *load_filter_co; /* used to load incarnations filters */
+
CoMutex lock;
uint32_t crypt_method; /* current crypt method, 0 if no key yet */
--
1.7.10.4
- [Qemu-devel] [RFC V8 00/24] QCOW2 deduplication core functionality, Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 01/24] qcow2: Add journal specification., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 02/24] qcow2: Add deduplication structures and fields.,
Benoît Canet <=
- [Qemu-devel] [RFC V8 03/24] qcow2: Add journal., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 04/24] qcow2: Create the log store., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 07/24] qcow2: Add qcow2_de dup_read_missing_and_concatenate, Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 08/24] qcow2: Create a way to link to l2 tables when deduplicating., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 06/24] qcow2: Add the dedupl ication store., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 05/24] qcow2: Add the hash s tore., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 09/24] qcow2: Make qcow2_update_cluster_refcount public., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 12/24] qcow2: Do allocate on rewrite on the dedup case., Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 10/24] qcow2: Add qcow2_dedup and related functions, Benoît Canet, 2013/06/20
- [Qemu-devel] [RFC V8 11/24] qcow2: Add qcow2_dedup_store_new_hashes., Benoît Canet, 2013/06/20