qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] VMDK: add monolithic flat image support


From: Fam Zheng
Subject: [Qemu-devel] [PATCH] VMDK: add monolithic flat image support
Date: Mon, 30 May 2011 15:49:28 +0800

VMDK multiple file images can not be recognized for now. This patch is
adding monolithic flat support to it, that is the image type with two
files, one text descriptor file and a plain data file. This type of
image can be created in VMWare, with the options "allocate all disk
space now" and "store virtual disk as a single file" checked.

A VmdkExtent structure is introduced to hold the image "extent"
information, which makes further adding multi extents support of VMDK
easy. An image creating option "flat" is added for creating flat
(preallocated) image.

Signed-off-by: Feiran (Fam) Zheng <address@hidden>
---
 block/vmdk.c |  686 +++++++++++++++++++++++++++++++++++++++++++++-------------
 block_int.h  |    2 +
 qemu-img.c   |   12 +-
 3 files changed, 542 insertions(+), 158 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 8fc9d67..726ad3a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -30,6 +30,8 @@
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')

+#define VMDK_FLAT_BACKING 0
+
 typedef struct {
     uint32_t version;
     uint32_t flags;
@@ -60,7 +62,10 @@ typedef struct {

 #define L2_CACHE_SIZE 16

-typedef struct BDRVVmdkState {
+typedef struct VmdkExtent {
+    BlockDriverState *file;
+    int flat;
+    int64_t sectors;
     int64_t l1_table_offset;
     int64_t l1_backup_table_offset;
     uint32_t *l1_table;
@@ -73,8 +78,15 @@ typedef struct BDRVVmdkState {
     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
     uint32_t l2_cache_counts[L2_CACHE_SIZE];

-    unsigned int cluster_sectors;
     uint32_t parent_cid;
+    unsigned int cluster_sectors;
+} VmdkExtent;
+
+typedef struct BDRVVmdkState {
+    int has_descriptor_file;
+    int extent_size;
+    int cid_update;
+    VmdkExtent *extents;
 } BDRVVmdkState;

 typedef struct VmdkMetaData {
@@ -88,21 +100,30 @@ typedef struct VmdkMetaData {
 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     uint32_t magic;
-
+    char *cid_p, *ct_p, *extent_p;
+    char cid_str[] = "CID";
+    char ct_str[] = "createType";
+    char extent_str[] = "RW";
     if (buf_size < 4)
         return 0;
     magic = be32_to_cpu(*(uint32_t *)buf);
     if (magic == VMDK3_MAGIC ||
-        magic == VMDK4_MAGIC)
+        magic == VMDK4_MAGIC) {
         return 100;
-    else
+    } else {
+        cid_p = strstr((char *)buf, cid_str);
+        ct_p = strstr((char *)buf, ct_str);
+        extent_p = strstr((char *)buf, extent_str);
+        if (cid_p && ct_p && extent_p)
+            return 100;
+    }
         return 0;
 }

 #define CHECK_CID 1

 #define SECTOR_SIZE 512
-#define DESC_SIZE 20*SECTOR_SIZE       // 20 sectors of 512 bytes each
+#define DESC_SIZE (20 * SECTOR_SIZE)    // 20 sectors of 512 bytes each
 #define HEADER_SIZE 512                        // first sector of 512 bytes

 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
@@ -111,11 +132,11 @@ static uint32_t vmdk_read_cid(BlockDriverState
*bs, int parent)
     uint32_t cid;
     const char *p_name, *cid_str;
     size_t cid_str_size;
+    BDRVVmdkState *s = bs->opaque;
+    int desc_offset = s->has_descriptor_file ? 0 : 0x200;

-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+    if (bdrv_pread(bs->file, desc_offset, desc, DESC_SIZE) != DESC_SIZE)
         return 0;
-
     if (parent) {
         cid_str = "parentCID";
         cid_str_size = sizeof("parentCID");
@@ -128,19 +149,52 @@ static uint32_t vmdk_read_cid(BlockDriverState
*bs, int parent)
         p_name += cid_str_size;
         sscanf(p_name,"%x",&cid);
     }
-
     return cid;
 }

+#ifdef _WIN32
+static int64_t get_file_size(const char *filename)
+{
+    typedef DWORD (WINAPI * get_compressed_t)
+                    (const char *filename, DWORD *high);
+    get_compressed_t get_compressed;
+    struct _stati64 st;
+
+    /* WinNT support GetCompressedFileSize to determine allocate size */
+    get_compressed = (get_compressed_t)
+        GetProcAddress(GetModuleHandle("kernel32"), "GetCompressedFileSizeA");
+    if (get_compressed) {
+        DWORD high, low;
+        low = get_compressed(filename, &high);
+        if (low != 0xFFFFFFFFlu || GetLastError() == NO_ERROR)
+        return (((int64_t) high) << 32) + low;
+    }
+
+    if (_stati64(filename, &st) < 0)
+        return -1;
+    return st.st_size;
+}
+#else
+static int64_t get_file_size(const char *filename)
+{
+    struct stat st;
+    if (stat(filename, &st) < 0)
+        return -1;
+    return st.st_size;
+}
+#endif
+
 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
 {
     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
     char *p_name, *tmp_str;
-
-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+    BDRVVmdkState * s = bs->opaque;
+    int desc_offset = s->has_descriptor_file ? 0 : 0x200;
+    int desc_size = s->has_descriptor_file ?
+        get_file_size(bs->file->filename) : DESC_SIZE;
+    memset(desc, 0, DESC_SIZE);
+    if (bdrv_pread(bs->file, desc_offset, desc, desc_size) != desc_size)
         return -1;
-
     tmp_str = strstr(desc,"parentCID");
     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
     if ((p_name = strstr(desc,"CID")) != NULL) {
@@ -149,21 +203,20 @@ static int vmdk_write_cid(BlockDriverState *bs,
uint32_t cid)
         pstrcat(desc, sizeof(desc), tmp_desc);
     }

-    if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
+    if (bdrv_pwrite_sync(bs->file, desc_offset, desc, desc_size) < 0)
         return -1;
     return 0;
 }

-static int vmdk_is_cid_valid(BlockDriverState *bs)
+static int vmdk_is_cid_valid(BlockDriverState *bs, VmdkExtent *extent)
 {
 #ifdef CHECK_CID
-    BDRVVmdkState *s = bs->opaque;
     BlockDriverState *p_bs = bs->backing_hd;
     uint32_t cur_pcid;

     if (p_bs) {
-        cur_pcid = vmdk_read_cid(p_bs,0);
-        if (s->parent_cid != cur_pcid)
+        cur_pcid = vmdk_read_cid(p_bs, 0);
+        if (extent->parent_cid != cur_pcid)
             // CID not valid
             return 0;
     }
@@ -198,7 +251,9 @@ static int vmdk_snapshot_create(const char
*filename, const char *backing_file)
     "#DDB\n"
     "\n";

-    snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY |
O_LARGEFILE, 0644);
+    snp_fd = open(filename,
+                O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                0644);
     if (snp_fd < 0)
         return -errno;
     p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
@@ -277,7 +332,8 @@ static int vmdk_snapshot_create(const char
*filename, const char *backing_file)
      * Each GDE span 32M disk, means:
      * 512 GTE per GT, each GTE points to grain
      */
-    gt_size = (int64_t)header.num_gtes_per_gte * header.granularity *
SECTOR_SIZE;
+    gt_size =
+        (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
     if (!gt_size) {
         ret = -EINVAL;
         goto fail;
@@ -338,9 +394,9 @@ static int vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
     char desc[DESC_SIZE];
-
-    /* the descriptor offset = 0x200 */
-    if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
+    BDRVVmdkState *s = bs->opaque;
+    int desc_offset = s->has_descriptor_file ? 0 : 0x200;
+    if (bdrv_pread(bs->file, desc_offset, desc, DESC_SIZE) != DESC_SIZE)
         return -1;

     if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
@@ -358,106 +414,242 @@ static int vmdk_parent_open(BlockDriverState *bs)
     return 0;
 }

+/* find an option value out of descriptor file */
+static int vmdk_parse_description(const char *desc, const char *opt_name,
+        char *buf, int buf_size)
+{
+    char *opt_pos = strstr(desc, opt_name);
+    int r;
+    if (!opt_pos) return -1;
+    opt_pos += strlen(opt_name) + 2;
+    r = sscanf(opt_pos, "%[^\"]s", buf);
+    assert(r <= buf_size);
+    return r <= 0;
+}
+
+static int vmdk_parse_extents(const char *desc, VmdkExtent extents[],
+        const char *desc_file_path)
+{
+    const char *p = desc;
+    int ret = 0;
+    int r;
+    while (*p) {
+        if (strncmp(p, "RW", strlen("RW")) == 0) {
+            /* parse extent line:
+             * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
+             * or
+             * RW [size in sectors] SPARSE "file-name.vmdk"
+             */
+            char access[11] = "";
+            int64_t sectors = 0;
+            char type[11] = "";
+            char fname[512] = "";
+            sscanf(p, "%10s %lld %10s \"%[^\"]512s\"",
+                    access, &sectors, type, fname);
+            if (!(strlen(access) && sectors && strlen(type)
+                        && strlen(fname)))
+                goto cont;
+            if (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) goto cont;
+            if (strcmp(access, "RW")) goto cont;
+
+            ret++;
+            if (!extents) goto cont;
+
+            /* save to extents array */
+            if (!strcmp(type, "FLAT")) {
+                /* FLAT extent */
+                char extent_path[1024];
+                path_combine(extent_path, sizeof(extent_path),
+                        desc_file_path, fname);
+                VmdkExtent *ext = &extents[ret - 1];
+                ext->flat = 1;
+                ext->sectors = sectors;
+                ext->cluster_sectors = sectors;
+                ext->file = bdrv_new("");
+                if (!ext->file) return -1;
+                r = bdrv_open(ext->file,
+                    extent_path,
+                    BDRV_O_RDWR | BDRV_O_NO_BACKING,
+                    NULL);
+                if (r) return -1;
+            } else {
+                /* SPARSE extent, should not be here */
+                assert(0 && "not supported");
+            }
+
+        }
+cont:
+        /* move to next line */
+        while (*p && *p != '\n') p++;
+        p++;
+
+    }
+    return ret;
+}
+
 static int vmdk_open(BlockDriverState *bs, int flags)
 {
     BDRVVmdkState *s = bs->opaque;
     uint32_t magic;
     int l1_size, i;
-
+    VmdkExtent *extent = NULL;
     if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
         goto fail;

     magic = be32_to_cpu(magic);
     if (magic == VMDK3_MAGIC) {
         VMDK3Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header,
sizeof(header)) != sizeof(header))
+        s->extents = qemu_mallocz(sizeof(VmdkExtent));
+        s->extent_size = 1;
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+            != sizeof(header))
             goto fail;
-        s->cluster_sectors = le32_to_cpu(header.granularity);
-        s->l2_size = 1 << 9;
-        s->l1_size = 1 << 6;
-        bs->total_sectors = le32_to_cpu(header.disk_sectors);
-        s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
-        s->l1_backup_table_offset = 0;
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
+        extent = s->extents;
+        extent->flat = 0;
+        extent->file = bs->file;
+        extent->cluster_sectors = le32_to_cpu(header.granularity);
+        extent->l2_size = 1 << 9;
+        extent->l1_size = 1 << 6;
+        extent->sectors = le32_to_cpu(header.disk_sectors);
+        extent->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
+        extent->l1_backup_table_offset = 0;
+        extent->l1_entry_sectors = extent->l2_size * extent->cluster_sectors;
     } else if (magic == VMDK4_MAGIC) {
         VMDK4Header header;
-
-        if (bdrv_pread(bs->file, sizeof(magic), &header,
sizeof(header)) != sizeof(header))
+        s->extents = qemu_mallocz(sizeof(VmdkExtent));
+        s->extent_size = 1;
+        if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
+            != sizeof(header))
             goto fail;
-        bs->total_sectors = le64_to_cpu(header.capacity);
-        s->cluster_sectors = le64_to_cpu(header.granularity);
-        s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
-        s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
-        if (s->l1_entry_sectors <= 0)
+        extent = s->extents;
+        extent->file = bs->file;
+        extent->sectors = le64_to_cpu(header.capacity);
+        extent->cluster_sectors = le64_to_cpu(header.granularity);
+        extent->l2_size = le32_to_cpu(header.num_gtes_per_gte);
+        extent->l1_entry_sectors = extent->l2_size * extent->cluster_sectors;
+        if (extent->l1_entry_sectors <= 0)
             goto fail;
-        s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
-            / s->l1_entry_sectors;
-        s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
-        s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
+        extent->l1_size = (extent->sectors + extent->l1_entry_sectors - 1)
+            / extent->l1_entry_sectors;
+        extent->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
+        extent->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;

         // try to open parent images, if exist
         if (vmdk_parent_open(bs) != 0)
             goto fail;
         // write the CID once after the image creation
-        s->parent_cid = vmdk_read_cid(bs,1);
+        extent->parent_cid = vmdk_read_cid(bs,1);
     } else {
+        char buf[2048];
+        char ct[128];
+        if (bdrv_pread(bs->file, 0, buf, sizeof(buf)) == 0)
+            goto fail;
+        if (0 != vmdk_parse_description(buf, "createType", ct, sizeof(ct)))
+            goto fail;
+        if (0 != strcmp(ct, "monolithicFlat"))
+            goto fail;
+        bs->disk_size += get_file_size(bs->file->filename);
+        s->has_descriptor_file = 1;
+        s->extent_size = vmdk_parse_extents(buf, NULL, NULL);
+        if  (!s->extent_size)
+            goto fail;
+        s->extents = qemu_mallocz(s->extent_size * sizeof(VmdkExtent));
+        extent = s->extents;
+        vmdk_parse_extents(buf, s->extents, bs->file->filename);
+        bs->total_sectors = 0;
+
+        // try to open parent images, if exist
+        if (vmdk_parent_open(bs) != 0)
         goto fail;
+        // write the CID once after the image creation
+        extent->parent_cid = vmdk_read_cid(bs,1);
+
+        for (i = 0; i < s->extent_size; i++)
+        {
+            bs->disk_size += get_file_size(s->extents[i].file->filename);
+            bs->total_sectors += s->extents[i].sectors;
+        }
+        return 0;
+    }
+
+    bs->total_sectors = 0;
+    for (i = 0; i < s->extent_size; i++)
+    {
+        bs->disk_size += get_file_size(s->extents[i].file->filename);
+        bs->total_sectors += s->extents[i].sectors;
     }

     /* read the L1 table */
-    l1_size = s->l1_size * sizeof(uint32_t);
-    s->l1_table = qemu_malloc(l1_size);
-    if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
l1_size) != l1_size)
+    l1_size = extent->l1_size * sizeof(uint32_t);
+    extent->l1_table = qemu_malloc(l1_size);
+    if (bdrv_pread(bs->file,
+            extent->l1_table_offset,
+            extent->l1_table,
+            l1_size)
+        != l1_size)
         goto fail;
-    for(i = 0; i < s->l1_size; i++) {
-        le32_to_cpus(&s->l1_table[i]);
+    for(i = 0; i < extent->l1_size; i++) {
+        le32_to_cpus(&extent->l1_table[i]);
     }

-    if (s->l1_backup_table_offset) {
-        s->l1_backup_table = qemu_malloc(l1_size);
-        if (bdrv_pread(bs->file, s->l1_backup_table_offset,
s->l1_backup_table, l1_size) != l1_size)
+    if (extent->l1_backup_table_offset) {
+        extent->l1_backup_table = qemu_malloc(l1_size);
+        if (bdrv_pread(bs->file,
+                extent->l1_backup_table_offset,
+                extent->l1_backup_table,
+                l1_size)
+            != l1_size)
             goto fail;
-        for(i = 0; i < s->l1_size; i++) {
-            le32_to_cpus(&s->l1_backup_table[i]);
+        for(i = 0; i < extent->l1_size; i++) {
+            le32_to_cpus(&extent->l1_backup_table[i]);
         }
     }

-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
+    extent->l2_cache =
+        qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
     return 0;
  fail:
-    qemu_free(s->l1_backup_table);
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    if (extent) {
+        qemu_free(extent->l1_backup_table);
+        qemu_free(extent->l1_table);
+        qemu_free(extent->l2_cache);
+    }
     return -1;
 }

-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
-                                   uint64_t offset, int allocate);
-
-static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
-                             uint64_t offset, int allocate)
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                    VmdkExtent *extent,
+                    VmdkMetaData *m_data,
+                    uint64_t offset,
+                    int allocate);
+
+static int get_whole_cluster(BlockDriverState *bs,
+                VmdkExtent *extent,
+                uint64_t cluster_offset,
+                uint64_t offset,
+                int allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
-    uint8_t  whole_grain[s->cluster_sectors*512];        // 128
sectors * 512 bytes each = grain size 64KB
+    // 128 sectors * 512 bytes each = grain size 64KB
+    uint8_t  whole_grain[extent->cluster_sectors * 512];

     // we will be here if it's first write on non-exist grain(cluster).
     // try to read from parent image, if exist
     if (bs->backing_hd) {
         int ret;

-        if (!vmdk_is_cid_valid(bs))
+        if (!vmdk_is_cid_valid(bs, extent))
             return -1;
-
+        // floor offset to cluster
+        offset -= offset % (extent->cluster_sectors * 512);
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
-            s->cluster_sectors);
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }

         //Write grain only into the active image
-        ret = bdrv_write(bs->file, cluster_offset, whole_grain,
-            s->cluster_sectors);
+        ret = bdrv_write(extent->file, cluster_offset, whole_grain,
+                extent->cluster_sectors);
         if (ret < 0) {
             return -1;
         }
@@ -465,52 +657,62 @@ static int get_whole_cluster(BlockDriverState
*bs, uint64_t cluster_offset,
     return 0;
 }

-static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
+static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
-    BDRVVmdkState *s = bs->opaque;
-
+    if (extent->flat) return -1;
     /* update L2 table */
-    if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (bdrv_pwrite_sync(
+            extent->file,
+            ((int64_t)m_data->l2_offset * 512)
+                + (m_data->l2_index * sizeof(m_data->offset)),
+            &(m_data->offset),
+            sizeof(m_data->offset)
+        ) < 0)
         return -1;
     /* update backup L2 table */
-    if (s->l1_backup_table_offset != 0) {
-        m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset *
512) + (m_data->l2_index * sizeof(m_data->offset)),
-                        &(m_data->offset), sizeof(m_data->offset)) < 0)
+    if (extent->l1_backup_table_offset != 0) {
+        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
+        if (bdrv_pwrite_sync(
+                extent->file,
+                ((int64_t)m_data->l2_offset * 512)
+                    + (m_data->l2_index * sizeof(m_data->offset)),
+                &(m_data->offset), sizeof(m_data->offset)
+            ) < 0)
             return -1;
     }

     return 0;
 }

-static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+                                    VmdkExtent *extent,
+                                    VmdkMetaData *m_data,
                                    uint64_t offset, int allocate)
 {
-    BDRVVmdkState *s = bs->opaque;
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
     uint32_t min_count, *l2_table, tmp = 0;
     uint64_t cluster_offset;

+    if (extent->flat) return 0;
     if (m_data)
         m_data->valid = 0;

-    l1_index = (offset >> 9) / s->l1_entry_sectors;
-    if (l1_index >= s->l1_size)
+    l1_index = (offset >> 9) / extent->l1_entry_sectors;
+    if (l1_index >= extent->l1_size)
         return 0;
-    l2_offset = s->l1_table[l1_index];
+    l2_offset = extent->l1_table[l1_index];
     if (!l2_offset)
         return 0;
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
+        if (l2_offset == extent->l2_cache_offsets[i]) {
             /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
+            if (++extent->l2_cache_counts[i] == 0xffffffff) {
                 for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
+                    extent->l2_cache_counts[j] >>= 1;
                 }
             }
-            l2_table = s->l2_cache + (i * s->l2_size);
+            l2_table = extent->l2_cache + (i * extent->l2_size);
             goto found;
         }
     }
@@ -518,20 +720,24 @@ static uint64_t
get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
     min_index = 0;
     min_count = 0xffffffff;
     for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
+        if (extent->l2_cache_counts[i] < min_count) {
+            min_count = extent->l2_cache_counts[i];
             min_index = i;
         }
     }
-    l2_table = s->l2_cache + (min_index * s->l2_size);
-    if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table,
s->l2_size * sizeof(uint32_t)) !=
-
  s->l2_size * sizeof(uint32_t))
+    l2_table = extent->l2_cache + (min_index * extent->l2_size);
+    if (bdrv_pread(
+            extent->file,
+            (int64_t)l2_offset * 512,
+            l2_table,
+            extent->l2_size * sizeof(uint32_t)
+        ) != extent->l2_size * sizeof(uint32_t))
         return 0;

-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
+    extent->l2_cache_offsets[min_index] = l2_offset;
+    extent->l2_cache_counts[min_index] = 1;
  found:
-    l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
+    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     cluster_offset = le32_to_cpu(l2_table[l2_index]);

     if (!cluster_offset) {
@@ -539,8 +745,11 @@ static uint64_t
get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
             return 0;

         // Avoid the L2 tables update for the images that have snapshots.
-        cluster_offset = bdrv_getlength(bs->file);
-        bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
+        cluster_offset = bdrv_getlength(extent->file);
+        bdrv_truncate(
+            extent->file,
+            cluster_offset + (extent->cluster_sectors << 9)
+        );

         cluster_offset >>= 9;
         tmp = cpu_to_le32(cluster_offset);
@@ -551,7 +760,8 @@ static uint64_t
get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
          * This problem may occur because of insufficient space on host disk
          * or inappropriate VM shutdown.
          */
-        if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
+        if (get_whole_cluster(
+                bs, extent, cluster_offset, offset, allocate) == -1)
             return 0;

         if (m_data) {
@@ -570,35 +780,68 @@ static int vmdk_is_allocated(BlockDriverState
*bs, int64_t sector_num,
                              int nb_sectors, int *pnum)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-    index_in_cluster = sector_num % s->cluster_sectors;
-    n = s->cluster_sectors - index_in_cluster;
+    int ext_idx = 0;
+    int index_in_cluster, n, ret;
+    uint64_t offset;
+    VmdkExtent *extent;
+    while (ext_idx < s->extent_size
+            && sector_num >= s->extents[ext_idx].sectors) {
+        sector_num -= s->extents[ext_idx].sectors;
+        ext_idx++;
+    }
+    if (ext_idx == s->extent_size) return 0;
+    extent = &s->extents[ext_idx];
+    if (s->extents[ext_idx].flat) {
+        n = extent->sectors - sector_num;
+        ret = 1;
+    } else {
+        offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
+        ret = offset ? 1 : 0;
+    }
     if (n > nb_sectors)
         n = nb_sectors;
     *pnum = n;
-    return (cluster_offset != 0);
+    return ret;
 }

 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    int index_in_cluster, n, ret;
+    int ext_idx = 0;
+    int ret;
+    uint64_t n, index_in_cluster;
+    VmdkExtent *extent;
     uint64_t cluster_offset;

     while (nb_sectors > 0) {
-        cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
-        index_in_cluster = sector_num % s->cluster_sectors;
-        n = s->cluster_sectors - index_in_cluster;
+        while (ext_idx < s->extent_size
+                && sector_num >= s->extents[ext_idx].sectors) {
+            sector_num -= s->extents[ext_idx].sectors;
+            ext_idx++;
+        }
+        if (ext_idx == s->extent_size) {
+            return -1;
+        }
+        extent = &s->extents[ext_idx];
+        if (extent->flat)
+            cluster_offset = 0;
+        else
+            cluster_offset = get_cluster_offset(
+                                bs, extent, NULL, sector_num << 9, 0);
+
+        cluster_offset =
+            get_cluster_offset(bs, extent, NULL, sector_num << 9, 0);
+        index_in_cluster = sector_num % extent->cluster_sectors;
+        n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
             n = nb_sectors;
-        if (!cluster_offset) {
+        if (!extent->flat && !cluster_offset) {
             // try to read from parent image, if exist
             if (bs->backing_hd) {
-                if (!vmdk_is_cid_valid(bs))
+                if (!vmdk_is_cid_valid(bs, extent))
                     return -1;
                 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
                 if (ret < 0)
@@ -607,7 +850,12 @@ static int vmdk_read(BlockDriverState *bs,
int64_t sector_num,
                 memset(buf, 0, 512 * n);
             }
         } else {
-            if(bdrv_pread(bs->file, cluster_offset + index_in_cluster
* 512, buf, n * 512) != n * 512)
+            if(bdrv_pread(
+                    extent->file,
+                    cluster_offset + index_in_cluster * 512,
+                    buf,
+                    n * 512
+                ) != n * 512)
                 return -1;
         }
         nb_sectors -= n;
@@ -621,11 +869,11 @@ static int vmdk_write(BlockDriverState *bs,
int64_t sector_num,
                      const uint8_t *buf, int nb_sectors)
 {
     BDRVVmdkState *s = bs->opaque;
-    VmdkMetaData m_data;
-    int index_in_cluster, n;
+    VmdkExtent *extent;
+    int ext_idx = 0;
+    int n, index_in_cluster;
     uint64_t cluster_offset;
-    static int cid_update = 0;
-
+    VmdkMetaData m_data;
     if (sector_num > bs->total_sectors) {
         fprintf(stderr,
                 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
@@ -635,19 +883,40 @@ static int vmdk_write(BlockDriverState *bs,
int64_t sector_num,
     }

     while (nb_sectors > 0) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
+        while (ext_idx < s->extent_size
+            && sector_num >= s->extents[ext_idx].sectors) {
+            sector_num -= s->extents[ext_idx].sectors;
+            ext_idx++;
+        }
+        if (ext_idx == s->extent_size) {
+            return -1;
+        }
+        extent = &s->extents[ext_idx];
+        if (extent->flat)
+            cluster_offset = 0;
+        else
+            cluster_offset = get_cluster_offset(
+                                bs,
+                                extent,
+                                &m_data,
+                                sector_num << 9, 1);
+
+        if (!extent->flat && !cluster_offset)
+            return -1;
+        index_in_cluster = sector_num & (extent->cluster_sectors - 1);
+        n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors)
             n = nb_sectors;
-        cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
-        if (!cluster_offset)
-            return -1;

-        if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster *
512, buf, n * 512) != n * 512)
+        if (bdrv_pwrite(
+                extent->file,
+                cluster_offset + index_in_cluster * 512,
+                buf, n * 512
+            ) != n * 512)
             return -1;
-        if (m_data.valid) {
+        if (!extent->flat && m_data.valid) {
             /* update L2 tables */
-            if (vmdk_L2update(bs, &m_data) == -1)
+            if (vmdk_L2update(extent, &m_data) == -1)
                 return -1;
         }
         nb_sectors -= n;
@@ -655,9 +924,9 @@ static int vmdk_write(BlockDriverState *bs,
int64_t sector_num,
         buf += n * 512;

         // update CID on the first write every time the virtual disk is opened
-        if (!cid_update) {
+        if (!s->cid_update) {
             vmdk_write_cid(bs, time(NULL));
-            cid_update++;
+            s->cid_update++;
         }
     }
     return 0;
@@ -665,32 +934,17 @@ static int vmdk_write(BlockDriverState *bs,
int64_t sector_num,

 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
 {
-    int fd, i;
+    int fd;
+    int64_t i;
     VMDK4Header header;
     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
-    static const char desc_template[] =
-        "# Disk DescriptorFile\n"
-        "version=1\n"
-        "CID=%x\n"
-        "parentCID=ffffffff\n"
-        "createType=\"monolithicSparse\"\n"
-        "\n"
-        "# Extent description\n"
-        "RW %" PRId64 " SPARSE \"%s\"\n"
-        "\n"
-        "# The Disk Data Base \n"
-        "#DDB\n"
-        "\n"
-        "ddb.virtualHWVersion = \"%d\"\n"
-        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
-        "ddb.geometry.heads = \"16\"\n"
-        "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"ide\"\n";
+
     char desc[1024];
     const char *real_filename, *temp_str;
     int64_t total_size = 0;
     const char *backing_file = NULL;
     int flags = 0;
+    int flat = 0;
     int ret;

     // Read out options
@@ -701,16 +955,125 @@ static int vmdk_create(const char *filename,
QEMUOptionParameter *options)
             backing_file = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
             flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
+        } else if (!strcmp(options->name, BLOCK_OPT_FLAT)) {
+            flat = options->value.n;
         }
         options++;
     }

+    if (flat) {
+        const char desc_template[] =
+        "# Disk DescriptorFile\n"
+        "version=1\n"
+        "CID=%x\n"
+        "parentCID=ffffffff\n"
+        "createType=\"monolithicFlat\"\n"
+        "\n"
+        "# Extent description\n"
+        "RW %" PRId64 " FLAT \"%s\" 0\n"
+        "\n"
+        "# The Disk Data Base \n"
+        "#DDB\n"
+        "\n"
+        "ddb.virtualHWVersion = \"%d\"\n"
+        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
+        "ddb.geometry.heads = \"16\"\n"
+        "ddb.geometry.sectors = \"63\"\n"
+        "ddb.adapterType = \"ide\"\n";
+        char ext_filename[1024];
+        strncpy(ext_filename, filename, 1024);
+        ext_filename[1023] = '\0';
+        if (!strcmp(&ext_filename[strlen(ext_filename) - 5], ".vmdk"))
+            strcpy(&ext_filename[strlen(ext_filename) - 5], "-flat.vmdk");
+        else
+            strcat(ext_filename, "-flat.vmdk");
+        /* create extent first */
+        fd = open(
+                ext_filename,
+                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                0644);
+        if (fd < 0)
+            return -errno;
+        ret = ftruncate(fd, total_size * 512);
+        if (ret) goto exit;
+#if VMDK_FLAT_BACKING
+        if (backing_file) {
+            uint8_t buf[512];
+            BlockDriverState *bf = bdrv_new("");
+            if (!bf) {
+                ret = -1;
+                goto exit;
+            }
+            ret = bdrv_open(bf, backing_file, BDRV_O_RDWR, NULL);
+            if (ret) {
+                bdrv_delete(bf);
+                goto exit;
+            }
+            for (i = 0; i < total_size; i++) {
+                int num;
+                if (bdrv_is_allocated(bf, i, 1, &num)) {
+                    ret = bdrv_read(bf, i, buf, 1);
+                    if (ret)
+                    {
+                        bdrv_delete(bf);
+                        goto exit;
+                    }
+                    ret = pwrite(fd, buf, 512, i * 512);
+                    if (ret != 512)
+                    {
+                        bdrv_delete(bf);
+                        goto exit;
+                    }
+                }
+            }
+        }
+#endif
+        close(fd);
+
+        /* generate descriptor file */
+        snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
+                 total_size, ext_filename,
+                 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+                 total_size / (int64_t)(63 * 16));
+        fd = open(
+                filename,
+                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                0644);
+        if (fd < 0)
+            return -errno;
+        ret = qemu_write_full(fd, desc, strlen(desc));
+        if (ret != strlen(desc)) {
+            ret = -errno;
+            goto exit;
+        }
+        ret = 0;
+    } else {
+        const char desc_template[] =
+        "# Disk DescriptorFile\n"
+        "version=1\n"
+        "CID=%x\n"
+        "parentCID=ffffffff\n"
+        "createType=\"monolithicSparse\"\n"
+        "\n"
+        "# Extent description\n"
+        "RW %" PRId64 " SPARSE \"%s\"\n"
+        "\n"
+        "# The Disk Data Base \n"
+        "#DDB\n"
+        "\n"
+        "ddb.virtualHWVersion = \"%d\"\n"
+        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
+        "ddb.geometry.heads = \"16\"\n"
+        "ddb.geometry.sectors = \"63\"\n"
+        "ddb.adapterType = \"ide\"\n";
     /* XXX: add support for backing file */
     if (backing_file) {
         return vmdk_snapshot_create(filename, backing_file);
     }

-    fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+        fd = open(
+            filename,
+            O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
               0644);
     if (fd < 0)
         return -errno;
@@ -724,7 +1087,8 @@ static int vmdk_create(const char *filename,
QEMUOptionParameter *options)

     grains = (total_size + header.granularity - 1) / header.granularity;
     gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
-    gt_count = (grains + header.num_gtes_per_gte - 1) /
header.num_gtes_per_gte;
+        gt_count =
+            (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
     gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;

     header.desc_offset = 1;
@@ -807,8 +1171,8 @@ static int vmdk_create(const char *filename,
QEMUOptionParameter *options)
         ret = -errno;
         goto exit;
     }
-
     ret = 0;
+    }
 exit:
     close(fd);
     return ret;
@@ -817,14 +1181,20 @@ exit:
 static void vmdk_close(BlockDriverState *bs)
 {
     BDRVVmdkState *s = bs->opaque;
-
-    qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    if (s->extent_size)
+        qemu_free(s->extents);
 }

 static int vmdk_flush(BlockDriverState *bs)
 {
-    return bdrv_flush(bs->file);
+    int ret = 0;
+    int i;
+    BDRVVmdkState *s = bs->opaque;
+    ret = bdrv_flush(bs->file);
+    for (i = 0; i <  s->extent_size; i++)
+        ret |= bdrv_flush(s->extents[i].file);
+    return ret;
+
 }


@@ -844,6 +1214,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
         .type = OPT_FLAG,
         .help = "VMDK version 6 image"
     },
+    {
+        .name = BLOCK_OPT_FLAT,
+        .type = OPT_FLAG,
+        .help = "VMDK flat extent image"
+    },
     { NULL }
 };

@@ -862,6 +1237,7 @@ static BlockDriver bdrv_vmdk = {
     .create_options = vmdk_create_options,
 };

+
 static void bdrv_vmdk_init(void)
 {
     bdrv_register(&bdrv_vmdk);
diff --git a/block_int.h b/block_int.h
index fa91337..8df11dd 100644
--- a/block_int.h
+++ b/block_int.h
@@ -34,6 +34,7 @@
 #define BLOCK_OPT_SIZE          "size"
 #define BLOCK_OPT_ENCRYPT       "encryption"
 #define BLOCK_OPT_COMPAT6       "compat6"
+#define BLOCK_OPT_FLAT          "flat"
 #define BLOCK_OPT_BACKING_FILE  "backing_file"
 #define BLOCK_OPT_BACKING_FMT   "backing_fmt"
 #define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
@@ -143,6 +144,7 @@ struct BlockDriver {
 struct BlockDriverState {
     int64_t total_sectors; /* if we are reading a disk image, give its
                               size in sectors */
+    int64_t disk_size; /* if non-zero, holds the total disk size */
     int read_only; /* if true, the media is read only */
     int keep_read_only; /* if true, the media was requested to stay
read only */
     int open_flags; /* flags used to open the file, re-used for re-open */
diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..0c6c0fb 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -980,12 +980,15 @@ out:
 }

 #ifdef _WIN32
-static int64_t get_allocated_file_size(const char *filename)
+static int64_t get_allocated_file_size(
+        BlockDriverState *bs, const char *filename)
 {
     typedef DWORD (WINAPI * get_compressed_t)(const char *filename,
DWORD *high);
     get_compressed_t get_compressed;
     struct _stati64 st;

+    if (bs->disk_size)
+        return bs->disk_size;
     /* WinNT support GetCompressedFileSize to determine allocate size */
     get_compressed = (get_compressed_t)
GetProcAddress(GetModuleHandle("kernel32"), "GetCompressedFileSizeA");
     if (get_compressed) {
@@ -1000,9 +1003,12 @@ static int64_t get_allocated_file_size(const
char *filename)
     return st.st_size;
 }
 #else
-static int64_t get_allocated_file_size(const char *filename)
+static int64_t get_allocated_file_size(
+        BlockDriverState *bs, const char *filename)
 {
     struct stat st;
+    if (bs->disk_size)
+        return bs->disk_size;
     if (stat(filename, &st) < 0)
         return -1;
     return (int64_t)st.st_blocks * 512;
@@ -1067,7 +1073,7 @@ static int img_info(int argc, char **argv)
     bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
     bdrv_get_geometry(bs, &total_sectors);
     get_human_readable_size(size_buf, sizeof(size_buf), total_sectors * 512);
-    allocated_size = get_allocated_file_size(filename);
+    allocated_size = get_allocated_file_size(bs, filename);
     if (allocated_size < 0) {
         snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
     } else {



reply via email to

[Prev in Thread] Current Thread [Next in Thread]