qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [v4] Support vhd type VHD_DIFFERENCING


From: Xiaodong Gong
Subject: Re: [Qemu-devel] [v4] Support vhd type VHD_DIFFERENCING
Date: Tue, 23 Sep 2014 22:25:42 +0800

This structure is not documented in the official VHD specs, nor is the 'tdbatmap'
cookie used in differencing VHDs created by Windows Server 2012. Isn't Hyper-V
compatibility desired?

Yepp, this structure of tdbatmap is a extra feature from vhd-util from Xen, here we
could make sure the version of VHD is bigger than 1.2, I'll add it in v5. 

Aren't those checks required no matter what platform specific locator is used?

Why not? IMO, w2ru (relative paths) support MUST be added before merging this
feature.

I think the licence of win2008 or others is a problem to me. It is hard to test it in 
my environment. May be a co-author of this patch is needed, do you ?

What if we have a chain of differencing VHD images and the immediate parent
does not have the required sector either, which may be at an upper level?
Shouldn't this method be called recursively in this case?

It is exactly, the vpc_open and vpc_read all should support the chain of snapshot.
But the missing of cache to bat table and bitmap, performance of snapshot is alread 
really hard to accept. So the support to a chain is better to have after we have cache, 
I think.     

On Mon, Sep 22, 2014 at 6:26 PM, Lucian Petrut <address@hidden> wrote:
From: =?ISO-8859-1?B?MjFH?= <address@hidden>

Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
so qemu can't read snapshot volume of vhd, and can't support
other storage features of vhd file.

This patch add read parent information in function "vpc_open",
read bitmap in "vpc_read", and change bitmap in "vpc_write".

Signed-off-by: Xiaodong Gong <address@hidden>
---
> Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC,
> so qemu can't read snapshot volume of vhd, and can't support
> other storage features of vhd file.
>
> This patch add read parent information in function "vpc_open",
> read bitmap in "vpc_read", and change bitmap in "vpc_write".
>
> Signed-off-by: Xiaodong Gong <address@hidden>
> ---
>  block/vpc.c               | 355 ++++++++++++++++++++++++++++++++++++----------
>  include/block/block_int.h |   6 +-
>  2 files changed, 288 insertions(+), 73 deletions(-)
>
> diff --git a/block/vpc.c b/block/vpc.c
> index c024b4c..2ff2bba 100644
> --- a/block/vpc.c
> +++ b/block/vpc.c
> @@ -33,13 +33,18 @@
>  /**************************************************************/
>
>  #define HEADER_SIZE 512
> +#define DYNAMIC_HEADER_SIZE 1024
> +#define PARENT_LOCATOR_NUM 8
> +#define PARENT_PREFIX_LEN 7 /* such as file:// */
> +#define TBBATMAP_HEAD_SIZE 28
> +#define PLATFORM_MACX 0x5863614d /* big endian */
>
>  //#define CACHE
>
>  enum vhd_type {
>      VHD_FIXED           = 2,
>      VHD_DYNAMIC         = 3,
> -    VHD_DIFFERENCING    = 4,
> +    VHD_DIFF            = 4,
>  };
>
>  // Seconds since Jan 1, 2000 0:00:00 (UTC)
> @@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
>      Error *migration_blocker;
>  } BDRVVPCState;
>
> +typedef struct vhd_tdbatmap_header {
> +    char magic[8]; /* always "tdbatmap" */
> +
> +    uint64_t batmap_offset;
> +    uint32_t batmap_size;
> +    uint32_t batmap_version;
> +    uint32_t checksum;
> +} QEMU_PACKED VHDTdBatmapHeader;
> +

This structure is not documented in the official VHD specs, nor is the 'tdbatmap'
cookie used in differencing VHDs created by Windows Server 2012. Isn't Hyper-V
compatibility desired?

>  static uint32_t vpc_checksum(uint8_t* buf, size_t size)
>  {
>      uint32_t res = 0;
> @@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
>  static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
>  {
>      if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
> -   return 100;
> +        return 100;
>      return 0;
>  }
>
> @@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
>      int i;
>      VHDFooter *footer;
>      VHDDynDiskHeader *dyndisk_header;
> -    uint8_t buf[HEADER_SIZE];
> +    uint8_t buf[DYNAMIC_HEADER_SIZE];
> +    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
>      uint32_t checksum;
>      uint64_t computed_size;
> -    int disk_type = VHD_DYNAMIC;
> +    uint32_t disk_type;
>      int ret;
> +    VHDTdBatmapHeader *tdbatmap_header;
> +    int parent_locator_offset = 0;
> +    int64_t data_offset = 0;
> +    int data_length = 0;
> +    uint32_t platform;
>
>      ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
>      if (ret < 0) {
> @@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
>      }
>
>      footer = (VHDFooter *) s->footer_buf;
> +    disk_type = be32_to_cpu(footer->type);
> +
>      if (strncmp(footer->creator, "conectix", 8)) {
>          int64_t offset = bdrv_getlength(bs->file);
>          if (offset < 0) {
> @@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
>          goto fail;
>      }
>
> -    if (disk_type == VHD_DYNAMIC) {
> +    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
>          ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
> -                         HEADER_SIZE);
> +                         DYNAMIC_HEADER_SIZE);
>          if (ret < 0) {
>              goto fail;
>          }
> @@ -286,6 +308,63 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
>          s->free_data_block_offset =
>              (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
>
> +        /* Read tdbatmap header by offset */
> +        ret = bdrv_pread(bs->file, s->free_data_block_offset,
> +            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
> +        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
> +            s->free_data_block_offset =
> +                be32_to_cpu(tdbatmap_header->batmap_size) * 512
> +                + be64_to_cpu(tdbatmap_header->batmap_offset);
> +        }
> +
> +        /* Read backing file location from dyn header table */
> +        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
> +            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
> +                data_offset =
> +                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
> +                data_length =
> +                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
> +                platform = dyndisk_header->parent_locator[i].platform;
> +
> +                if (platform == PLATFORM_MACX) {

Aren't those checks required no matter what platform specific locator is used?

> +                    if (data_offset + PARENT_PREFIX_LEN >
> +                        s->max_table_entries * s->block_size) {
> +                        goto fail;
> +                    }
> +                    if (data_length - PARENT_PREFIX_LEN > PARENT_MAX_LOCATOR) {
> +                        goto fail;
> +                    }
> +                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
> +                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
> +                    if (ret < 0) {
> +                        goto fail;
> +                    }
> +
> +                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
> +                }
> +
> +                if (data_offset > parent_locator_offset) {
> +                    parent_locator_offset = data_offset;
> +                }
> +            }
> +
> +            if (strlen(bs->backing_file) == 0) {
> +                error_setg(errp, "block-vpc: differencing is not support in"
> +                                 "w2ru or w2ku");

Why not? IMO, w2ru (relative paths) support MUST be added before merging this
feature.

> +                ret = -EINVAL;
> +                goto fail;
> +            }
> +        }
> +
> +        if (parent_locator_offset + 512 > s->free_data_block_offset) {
> +            s->free_data_block_offset = parent_locator_offset + 512;
> +        }
> +
>          for (i = 0; i < s->max_table_entries; i++) {
>              be32_to_cpus(&s->pagetable[i]);
>              if (s->pagetable[i] != 0xFFFFFFFF) {
> @@ -340,35 +419,76 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
>  }
>
>  /*
> - * Returns the absolute byte offset of the given sector in the image file.
> - * If the sector is not allocated, -1 is returned instead.
> + * Returns the absolute byte offset of the given sector in the differencing
> + * image file.
>   *
> - * The parameter write must be 1 if the offset will be used for a write
> - * operation (the block bitmaps is updated then), 0 otherwise.
> + * If error happened, -1 is returned.
> + *
> + * When write all type or read dynamic, if the sector is not allocated, -2
> + * is returned instead. If the sector is allocated in current file, the block
> + * offset is returned.
> + *
> + * When read diff. If the sector is not allocated, -2 is returned instead.
> + * If the sector is allocated in the backing file, -3 is returned. If the
> + * sector is allocated in current file, the block offset is returned.
>   */
>  static inline int64_t get_sector_offset(BlockDriverState *bs,
> -    int64_t sector_num, int write)
> +    int64_t sector_num, bool write, bool diff)
>  {
>      BDRVVPCState *s = bs->opaque;
> -    uint64_t offset = sector_num * 512;
> -    uint64_t bitmap_offset, block_offset;
> +    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
> +    uint64_t bitmap_offset;
>      uint32_t pagetable_index, pageentry_index;
> +    int64_t block_offset = LONG_MIN;
> +    int ret;
>
>      pagetable_index = offset / s->block_size;
> -    pageentry_index = (offset % s->block_size) / 512;
> +    pageentry_index = (offset % s->block_size) >> BDRV_SECTOR_BITS;
>
> -    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
> -        return -1; // not allocated
> +    if (pagetable_index >= s->max_table_entries) {
> +        return -2;
> +    }
> +    if (s->pagetable[pagetable_index] == 0xffffffff) {
> +        if (!write && diff) {
> +            return -3; /* parent allocated */
> +        } else {
> +            return -2; /* not allocated */
> +        }
> +    }
>
> -    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
> -    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
> +    bitmap_offset = (uint64_t) s->pagetable[pagetable_index]
> +        << BDRV_SECTOR_BITS;
> +
> +    if (!diff || write) {
> +        block_offset = bitmap_offset + s->bitmap_size
> +            + (pageentry_index << BDRV_SECTOR_BITS);
> +    } else {
> +        uint32_t bitmap_index, bitmapentry_index;
> +        uint8_t bitmap[s->bitmap_size];
> +
> +        if (bitmap_offset > s->max_table_entries * s->block_size) {
> +            return -1;
> +        }
> +        ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> +        if (ret < 0) {
> +            return -1;
> +        }
>
> +        bitmap_index = pageentry_index / 8;
> +        bitmapentry_index = 7 - pageentry_index % 8;
> +        if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
> +            block_offset = bitmap_offset + s->bitmap_size
> +                + (pageentry_index << BDRV_SECTOR_BITS);
> +        } else {
> +            return -3;
> +        }
> +    }
>      // We must ensure that we don't write to any sectors which are marked as
>      // unused in the bitmap. We get away with setting all bits in the block
>      // bitmap each time we write to a new block. This might cause Virtual PC to
>      // miss sparse read optimization, but it's not a problem in terms of
>      // correctness.
> -    if (write && (s->last_bitmap_offset != bitmap_offset)) {
> +    if (!diff && write && (s->last_bitmap_offset != bitmap_offset)) {
>          uint8_t bitmap[s->bitmap_size];
>
>          s->last_bitmap_offset = bitmap_offset;
> @@ -376,7 +496,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
>          bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
>      }
>
> -//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
> +//  printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
>  // sector_num, pagetable_index, pageentry_index,
>  // bitmap_offset, block_offset);
>
> @@ -437,7 +557,8 @@ static int rewrite_footer(BlockDriverState* bs)
>   *
>   * Returns the sectors' offset in the image file on success and < 0 on error
>   */
> -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
> +static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
> +    bool diff)
>  {
>      BDRVVPCState *s = bs->opaque;
>      int64_t bat_offset;
> @@ -457,7 +578,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
>      s->pagetable[index] = s->free_data_block_offset / 512;
>
>      // Initialize the block's bitmap
> -    memset(bitmap, 0xff, s->bitmap_size);
> +    if (diff) {
> +        memset(bitmap, 0x0, s->bitmap_size);
> +    } else {
> +        memset(bitmap, 0xff, s->bitmap_size);
> +    }
>      ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
>          s->bitmap_size);
>      if (ret < 0) {
> @@ -477,7 +602,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
>      if (ret < 0)
>          goto fail;
>
> -    return get_sector_offset(bs, sector_num, 0);
> +    return get_sector_offset(bs, sector_num, false, diff);
>
>  fail:
>      s->free_data_block_offset -= (s->block_size + s->bitmap_size);
> @@ -501,36 +626,66 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
>                      uint8_t *buf, int nb_sectors)
>  {
>      BDRVVPCState *s = bs->opaque;
> -    int ret;
> -    int64_t offset;
> -    int64_t sectors, sectors_per_block;
>      VHDFooter *footer = (VHDFooter *) s->footer_buf;
> +    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> +    int64_t offset, sectors;
> +    int ret;
>
> -    if (be32_to_cpu(footer->type) == VHD_FIXED) {
> +    switch (be32_to_cpu(footer->type)) {
> +    case VHD_FIXED:
>          return bdrv_read(bs->file, sector_num, buf, nb_sectors);
> -    }
> -    while (nb_sectors > 0) {
> -        offset = get_sector_offset(bs, sector_num, 0);
> -
> -        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> -        sectors = sectors_per_block - (sector_num % sectors_per_block);
> -        if (sectors > nb_sectors) {
> -            sectors = nb_sectors;
> -        }
> +    case VHD_DYNAMIC:
> +        while (nb_sectors > 0) {
> +            sectors = sectors_per_block - (sector_num % sectors_per_block);
> +            if (sectors > nb_sectors) {
> +                sectors = nb_sectors;
> +            }
>
> -        if (offset == -1) {
> -            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
> -        } else {
> -            ret = bdrv_pread(bs->file, offset, buf,
> -                sectors * BDRV_SECTOR_SIZE);
> -            if (ret != sectors * BDRV_SECTOR_SIZE) {
> +            offset = get_sector_offset(bs, sector_num, false, false);
> +            if (offset == -1) {
>                  return -1;
> +            } else if (offset == -2) {
> +                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
> +            } else {
> +                ret = bdrv_pread(bs->file, offset, buf,
> +                    sectors * BDRV_SECTOR_SIZE);
> +                if (ret != sectors * BDRV_SECTOR_SIZE) {
> +                    return -1;
> +                }
>              }
> +
> +            nb_sectors -= sectors;
> +            sector_num += sectors;
> +            buf += sectors * BDRV_SECTOR_SIZE;
>          }
> +        break;
> +    case VHD_DIFF:
> +        while (nb_sectors > 0) {
> +            offset = get_sector_offset(bs, sector_num, false, true);
> +            if (offset == -1) {
> +                return -1;
> +            } else if (offset == -2) {
> +                memset(buf, 0, BDRV_SECTOR_SIZE);
> +            } else if (offset == -3) {
> +                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
> +                    , buf, BDRV_SECTOR_SIZE);

What if we have a chain of differencing VHD images and the immediate parent
does not have the required sector either, which may be at an upper level?
Shouldn't this method be called recursively in this case?

> +                if (ret < 0) {
> +                    return -1;
> +                }
> +            } else {
> +                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
> +                if (ret != BDRV_SECTOR_SIZE) {
> +                    return -1;
> +                }
> +            }
>
> -        nb_sectors -= sectors;
> -        sector_num += sectors;
> -        buf += sectors * BDRV_SECTOR_SIZE;
> +            nb_sectors--;
> +            sector_num++;
> +            buf += BDRV_SECTOR_SIZE;
> +        }
> +        break;
> +    default:
> +        return -1;
>      }
>      return 0;
>  }
> @@ -546,44 +701,101 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
>      return ret;
>  }
>
> -static int vpc_write(BlockDriverState *bs, int64_t sector_num,
> -    const uint8_t *buf, int nb_sectors)
> +static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
> +    int64_t sectors)
>  {
>      BDRVVPCState *s = bs->opaque;
> -    int64_t offset;
> -    int64_t sectors, sectors_per_block;
> +    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
> +    uint64_t bitmap_offset;
> +    uint32_t pagetable_index, pageentry_index;
> +    uint8_t bitmap[s->bitmap_size];
> +    uint32_t bitmap_index, bitmapbit_index;
> +    int i;
>      int ret;
> -    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
>
> -    if (be32_to_cpu(footer->type) == VHD_FIXED) {
> -        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
> +    pagetable_index = offset / s->block_size;
> +    pageentry_index = (offset % s->block_size) / 512;
> +    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
> +
> +    if (bitmap_offset > s->max_table_entries * s->block_size) {
> +        return -1;
> +    }
> +    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> +    if (ret < 0) {
> +        return -1;
>      }
> -    while (nb_sectors > 0) {
> -        offset = get_sector_offset(bs, sector_num, 1);
>
> -        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> -        sectors = sectors_per_block - (sector_num % sectors_per_block);
> -        if (sectors > nb_sectors) {
> -            sectors = nb_sectors;
> +    for (i = 0; i < sectors; i++) {
> +        bitmap_index = pageentry_index / 8;
> +        bitmapbit_index = 7 - pageentry_index % 8;
> +        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
> +        pageentry_index++;
> +    }
> +    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> +    if (ret < 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vpc_write(BlockDriverState *bs, int64_t sector_num,
> +    const uint8_t *buf, int nb_sectors)
> +{
> +    BDRVVPCState *s = bs->opaque;
> +    VHDFooter *footer = (VHDFooter *) s->footer_buf;
> +    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> +    int64_t offset, sectors;
> +    bool diff = true;
> +    int ret = 0;
> +
> +    switch (be32_to_cpu(footer->type)) {
> +    case VHD_FIXED:
> +        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
> +    case VHD_DYNAMIC:
> +    case VHD_DIFF:
> +        if (be32_to_cpu(footer->type) == VHD_DYNAMIC) {
> +            diff = false;
>          }
>
> -        if (offset == -1) {
> -            offset = alloc_block(bs, sector_num);
> -            if (offset < 0)
> +        while (nb_sectors > 0) {
> +            sectors = sectors_per_block - (sector_num % sectors_per_block);
> +            if (sectors > nb_sectors) {
> +                sectors = nb_sectors;
> +            }
> +
> +            offset = get_sector_offset(bs, sector_num, true, diff);
> +            if (offset == -1) {
>                  return -1;
> -        }
> +            } else if (offset == -2) {
> +                offset = alloc_block(bs, sector_num, diff);
> +                if (offset < 0) {
> +                    return -1;
> +                }
> +            }
>
> -        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
> -        if (ret != sectors * BDRV_SECTOR_SIZE) {
> -            return -1;
> -        }
> +            ret = bdrv_pwrite(bs->file, offset, buf,
> +                sectors * BDRV_SECTOR_SIZE);
> +            if (ret != sectors * BDRV_SECTOR_SIZE) {
> +                return -1;
> +            }
>
> -        nb_sectors -= sectors;
> -        sector_num += sectors;
> -        buf += sectors * BDRV_SECTOR_SIZE;
> -    }
> +            if (diff) {
> +                ret = write_bitmap(bs, sector_num, sectors);
> +                if (ret < 0) {
> +                    return -1;
> +                }
> +            }
>
> -    return 0;
> +            nb_sectors -= sectors;
> +            sector_num += sectors;
> +            buf += sectors * BDRV_SECTOR_SIZE;
> +        }
> +        break;
> +    default:
> +        return -1;
> +    }
> +    return ret;
>  }
>
>  static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
> @@ -910,6 +1122,7 @@ static BlockDriver bdrv_vpc = {
>      .bdrv_close             = vpc_close,
>      .bdrv_reopen_prepare    = vpc_reopen_prepare,
>      .bdrv_create            = vpc_create,
> +    .supports_backing       = true,
>
>      .bdrv_read              = vpc_co_read,
>      .bdrv_write             = vpc_co_write,
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 8a61215..aab3ae8 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -335,8 +335,10 @@ struct BlockDriverState {
>      QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
>
>      char filename[1024];
> -    char backing_file[1024]; /* if non zero, the image is a diff of
> -                                this file image */
> +
> +#define PARENT_MAX_LOCATOR 512
> +    char backing_file[PARENT_MAX_LOCATOR]; /* if non zero, the is a diff
> +                                of this file image */
>      char backing_format[16]; /* if non-zero and backing_file exists */
>
>      QDict *full_open_options;

 block/vpc.c               | 355 ++++++++++++++++++++++++++++++++++++----------
 include/block/block_int.h |   6 +-
 2 files changed, 288 insertions(+), 73 deletions(-)

diff --git a/block/vpc.c b/block/vpc.c
index c024b4c..2ff2bba 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -33,13 +33,18 @@
 /**************************************************************/

 #define HEADER_SIZE 512
+#define DYNAMIC_HEADER_SIZE 1024
+#define PARENT_LOCATOR_NUM 8
+#define PARENT_PREFIX_LEN 7 /* such as file:// */
+#define TBBATMAP_HEAD_SIZE 28
+#define PLATFORM_MACX 0x5863614d /* big endian */

 //#define CACHE

 enum vhd_type {
     VHD_FIXED           = 2,
     VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
+    VHD_DIFF            = 4,
 };

 // Seconds since Jan 1, 2000 0:00:00 (UTC)
@@ -138,6 +143,15 @@ typedef struct BDRVVPCState {
     Error *migration_blocker;
 } BDRVVPCState;

+typedef struct vhd_tdbatmap_header {
+    char magic[8]; /* always "tdbatmap" */
+
+    uint64_t batmap_offset;
+    uint32_t batmap_size;
+    uint32_t batmap_version;
+    uint32_t checksum;
+} QEMU_PACKED VHDTdBatmapHeader;
+
 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 {
     uint32_t res = 0;
@@ -153,7 +167,7 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-   return 100;
+        return 100;
     return 0;
 }

@@ -164,11 +178,17 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     int i;
     VHDFooter *footer;
     VHDDynDiskHeader *dyndisk_header;
-    uint8_t buf[HEADER_SIZE];
+    uint8_t buf[DYNAMIC_HEADER_SIZE];
+    uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
     uint32_t checksum;
     uint64_t computed_size;
-    int disk_type = VHD_DYNAMIC;
+    uint32_t disk_type;
     int ret;
+    VHDTdBatmapHeader *tdbatmap_header;
+    int parent_locator_offset = 0;
+    int64_t data_offset = 0;
+    int data_length = 0;
+    uint32_t platform;

     ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
     if (ret < 0) {
@@ -176,6 +196,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     }

     footer = (VHDFooter *) s->footer_buf;
+    disk_type = be32_to_cpu(footer->type);
+
     if (strncmp(footer->creator, "conectix", 8)) {
         int64_t offset = bdrv_getlength(bs->file);
         if (offset < 0) {
@@ -230,9 +252,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }

-    if (disk_type == VHD_DYNAMIC) {
+    if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
+                         DYNAMIC_HEADER_SIZE);
         if (ret < 0) {
             goto fail;
         }
@@ -286,6 +308,63 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
         s->free_data_block_offset =
             (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;

+        /* Read tdbatmap header by offset */
+        ret = bdrv_pread(bs->file, s->free_data_block_offset,
+            tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
+        if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
+            s->free_data_block_offset =
+                be32_to_cpu(tdbatmap_header->batmap_size) * 512
+                + be64_to_cpu(tdbatmap_header->batmap_offset);
+        }
+
+        /* Read backing file location from dyn header table */
+        if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
+            for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
+                data_offset =
+                    be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
+                data_length =
+                    be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
+                platform = dyndisk_header->parent_locator[i].platform;
+
+                if (platform == PLATFORM_MACX) {
+                    if (data_offset + PARENT_PREFIX_LEN >
+                        s->max_table_entries * s->block_size) {
+                        goto fail;
+                    }
+                    if (data_length - PARENT_PREFIX_LEN > PARENT_MAX_LOCATOR) {
+                        goto fail;
+                    }
+                    ret = bdrv_pread(bs->file, data_offset + PARENT_PREFIX_LEN,
+                        bs->backing_file, data_length - PARENT_PREFIX_LEN);
+                    if (ret < 0) {
+                        goto fail;
+                    }
+
+                    bs->backing_file[data_length - PARENT_PREFIX_LEN] = '\0';
+                }
+
+                if (data_offset > parent_locator_offset) {
+                    parent_locator_offset = data_offset;
+                }
+            }
+
+            if (strlen(bs->backing_file) == 0) {
+                error_setg(errp, "block-vpc: differencing is not support in"
+                                 "w2ru or w2ku");
+                ret = -EINVAL;
+                goto fail;
+            }
+        }
+
+        if (parent_locator_offset + 512 > s->free_data_block_offset) {
+            s->free_data_block_offset = parent_locator_offset + 512;
+        }
+
         for (i = 0; i < s->max_table_entries; i++) {
             be32_to_cpus(&s->pagetable[i]);
             if (s->pagetable[i] != 0xFFFFFFFF) {
@@ -340,35 +419,76 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
 }

 /*
- * Returns the absolute byte offset of the given sector in the image file.
- * If the sector is not allocated, -1 is returned instead.
+ * Returns the absolute byte offset of the given sector in the differencing
+ * image file.
  *
- * The parameter write must be 1 if the offset will be used for a write
- * operation (the block bitmaps is updated then), 0 otherwise.
+ * If error happened, -1 is returned.
+ *
+ * When write all type or read dynamic, if the sector is not allocated, -2
+ * is returned instead. If the sector is allocated in current file, the block
+ * offset is returned.
+ *
+ * When read diff. If the sector is not allocated, -2 is returned instead.
+ * If the sector is allocated in the backing file, -3 is returned. If the
+ * sector is allocated in current file, the block offset is returned.
  */
 static inline int64_t get_sector_offset(BlockDriverState *bs,
-    int64_t sector_num, int write)
+    int64_t sector_num, bool write, bool diff)
 {
     BDRVVPCState *s = bs->opaque;
-    uint64_t offset = sector_num * 512;
-    uint64_t bitmap_offset, block_offset;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
     uint32_t pagetable_index, pageentry_index;
+    int64_t block_offset = LONG_MIN;
+    int ret;

     pagetable_index = offset / s->block_size;
-    pageentry_index = (offset % s->block_size) / 512;
+    pageentry_index = (offset % s->block_size) >> BDRV_SECTOR_BITS;

-    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
-        return -1; // not allocated
+    if (pagetable_index >= s->max_table_entries) {
+        return -2;
+    }
+    if (s->pagetable[pagetable_index] == 0xffffffff) {
+        if (!write && diff) {
+            return -3; /* parent allocated */
+        } else {
+            return -2; /* not allocated */
+        }
+    }

-    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
-    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+    bitmap_offset = (uint64_t) s->pagetable[pagetable_index]
+        << BDRV_SECTOR_BITS;
+
+    if (!diff || write) {
+        block_offset = bitmap_offset + s->bitmap_size
+            + (pageentry_index << BDRV_SECTOR_BITS);
+    } else {
+        uint32_t bitmap_index, bitmapentry_index;
+        uint8_t bitmap[s->bitmap_size];
+
+        if (bitmap_offset > s->max_table_entries * s->block_size) {
+            return -1;
+        }
+        ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+        if (ret < 0) {
+            return -1;
+        }

+        bitmap_index = pageentry_index / 8;
+        bitmapentry_index = 7 - pageentry_index % 8;
+        if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
+            block_offset = bitmap_offset + s->bitmap_size
+                + (pageentry_index << BDRV_SECTOR_BITS);
+        } else {
+            return -3;
+        }
+    }
     // We must ensure that we don't write to any sectors which are marked as
     // unused in the bitmap. We get away with setting all bits in the block
     // bitmap each time we write to a new block. This might cause Virtual PC to
     // miss sparse read optimization, but it's not a problem in terms of
     // correctness.
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
+    if (!diff && write && (s->last_bitmap_offset != bitmap_offset)) {
         uint8_t bitmap[s->bitmap_size];

         s->last_bitmap_offset = bitmap_offset;
@@ -376,7 +496,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
         bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
     }

-//    printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
+//  printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
 // sector_num, pagetable_index, pageentry_index,
 // bitmap_offset, block_offset);

@@ -437,7 +557,8 @@ static int rewrite_footer(BlockDriverState* bs)
  *
  * Returns the sectors' offset in the image file on success and < 0 on error
  */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
+    bool diff)
 {
     BDRVVPCState *s = bs->opaque;
     int64_t bat_offset;
@@ -457,7 +578,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     s->pagetable[index] = s->free_data_block_offset / 512;

     // Initialize the block's bitmap
-    memset(bitmap, 0xff, s->bitmap_size);
+    if (diff) {
+        memset(bitmap, 0x0, s->bitmap_size);
+    } else {
+        memset(bitmap, 0xff, s->bitmap_size);
+    }
     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
         s->bitmap_size);
     if (ret < 0) {
@@ -477,7 +602,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
     if (ret < 0)
         goto fail;

-    return get_sector_offset(bs, sector_num, 0);
+    return get_sector_offset(bs, sector_num, false, diff);

 fail:
     s->free_data_block_offset -= (s->block_size + s->bitmap_size);
@@ -501,36 +626,66 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
                     uint8_t *buf, int nb_sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
     VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    int ret;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
         return bdrv_read(bs->file, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
+    case VHD_DYNAMIC:
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }

-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
+            offset = get_sector_offset(bs, sector_num, false, false);
+            if (offset == -1) {
                 return -1;
+            } else if (offset == -2) {
+                memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf,
+                    sectors * BDRV_SECTOR_SIZE);
+                if (ret != sectors * BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
             }
+
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
         }
+        break;
+    case VHD_DIFF:
+        while (nb_sectors > 0) {
+            offset = get_sector_offset(bs, sector_num, false, true);
+            if (offset == -1) {
+                return -1;
+            } else if (offset == -2) {
+                memset(buf, 0, BDRV_SECTOR_SIZE);
+            } else if (offset == -3) {
+                ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
+                    , buf, BDRV_SECTOR_SIZE);
+                if (ret < 0) {
+                    return -1;
+                }
+            } else {
+                ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
+                if (ret != BDRV_SECTOR_SIZE) {
+                    return -1;
+                }
+            }

-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
+            nb_sectors--;
+            sector_num++;
+            buf += BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
     }
     return 0;
 }
@@ -546,44 +701,101 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }

-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
-    const uint8_t *buf, int nb_sectors)
+static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
+    int64_t sectors)
 {
     BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
+    uint64_t offset = sector_num << BDRV_SECTOR_BITS;
+    uint64_t bitmap_offset;
+    uint32_t pagetable_index, pageentry_index;
+    uint8_t bitmap[s->bitmap_size];
+    uint32_t bitmap_index, bitmapbit_index;
+    int i;
     int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;

-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+    pagetable_index = offset / s->block_size;
+    pageentry_index = (offset % s->block_size) / 512;
+    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
+
+    if (bitmap_offset > s->max_table_entries * s->block_size) {
+        return -1;
+    }
+    ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
     }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);

-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
+    for (i = 0; i < sectors; i++) {
+        bitmap_index = pageentry_index / 8;
+        bitmapbit_index = 7 - pageentry_index % 8;
+        bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
+        pageentry_index++;
+    }
+    ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+    if (ret < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vpc_write(BlockDriverState *bs, int64_t sector_num,
+    const uint8_t *buf, int nb_sectors)
+{
+    BDRVVPCState *s = bs->opaque;
+    VHDFooter *footer = (VHDFooter *) s->footer_buf;
+    int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+    int64_t offset, sectors;
+    bool diff = true;
+    int ret = 0;
+
+    switch (be32_to_cpu(footer->type)) {
+    case VHD_FIXED:
+        return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+    case VHD_DYNAMIC:
+    case VHD_DIFF:
+        if (be32_to_cpu(footer->type) == VHD_DYNAMIC) {
+            diff = false;
         }

-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
+        while (nb_sectors > 0) {
+            sectors = sectors_per_block - (sector_num % sectors_per_block);
+            if (sectors > nb_sectors) {
+                sectors = nb_sectors;
+            }
+
+            offset = get_sector_offset(bs, sector_num, true, diff);
+            if (offset == -1) {
                 return -1;
-        }
+            } else if (offset == -2) {
+                offset = alloc_block(bs, sector_num, diff);
+                if (offset < 0) {
+                    return -1;
+                }
+            }

-        ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
+            ret = bdrv_pwrite(bs->file, offset, buf,
+                sectors * BDRV_SECTOR_SIZE);
+            if (ret != sectors * BDRV_SECTOR_SIZE) {
+                return -1;
+            }

-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
-    }
+            if (diff) {
+                ret = write_bitmap(bs, sector_num, sectors);
+                if (ret < 0) {
+                    return -1;
+                }
+            }

-    return 0;
+            nb_sectors -= sectors;
+            sector_num += sectors;
+            buf += sectors * BDRV_SECTOR_SIZE;
+        }
+        break;
+    default:
+        return -1;
+    }
+    return ret;
 }

 static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
@@ -910,6 +1122,7 @@ static BlockDriver bdrv_vpc = {
     .bdrv_close             = vpc_close,
     .bdrv_reopen_prepare    = vpc_reopen_prepare,
     .bdrv_create            = vpc_create,
+    .supports_backing       = true,

     .bdrv_read              = vpc_co_read,
     .bdrv_write             = vpc_co_write,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8a61215..aab3ae8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -335,8 +335,10 @@ struct BlockDriverState {
     QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;

     char filename[1024];
-    char backing_file[1024]; /* if non zero, the image is a diff of
-                                this file image */
+
+#define PARENT_MAX_LOCATOR 512
+    char backing_file[PARENT_MAX_LOCATOR]; /* if non zero, the is a diff
+                                of this file image */
     char backing_format[16]; /* if non-zero and backing_file exists */

     QDict *full_open_options;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]