|
From: | Xiaodong Gong |
Subject: | Re: [Qemu-devel] [PATCH v5] Support vhd type VHD_DIFFERENCING |
Date: | Wed, 8 Oct 2014 20:53:07 +0800 |
On Fri, Sep 26, 2014 at 09:43:18PM +0800, Xiaodong Gong wrote:
> Now qemu only supports vhd type VHD_FIXED and VHD_DYNAMIC, so qemu
> can't read snapshot volume of vhd, and can't support other storage
> features of vhd file.
>
> This patch add read parent information in function "vpc_open", read
> bitmap in "vpc_read", and change bitmap in "vpc_write".
>
> Signed-off-by: Xiaodong Gong <address@hidden>
> ---
> block/vpc.c | 428 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 357 insertions(+), 71 deletions(-)
Waiting for code review.
I only consider patches for the block branch that have at least 1
Reviewed-by from another contributor.
Anyone?
> diff --git a/block/vpc.c b/block/vpc.c
> index 4947369..1210542 100644
> --- a/block/vpc.c
> +++ b/block/vpc.c
> @@ -29,17 +29,27 @@
> #if defined(CONFIG_UUID)
> #include <uuid/uuid.h>
> #endif
> +#include <iconv.h>
>
> /**************************************************************/
>
> #define HEADER_SIZE 512
> +#define DYNAMIC_HEADER_SIZE 1024
> +#define PARENT_LOCATOR_NUM 8
> +#define MACX_PREFIX_LEN 7 /* file:// */
> +#define TBBATMAP_HEAD_SIZE 28
> +
> +#define PLATFORM_MACX 0x5863614d /* big endian */
> +#define PLATFORM_W2RU 0x75723257
> +
> +#define VHD_VERSION(major, minor) (((major) << 16) | ((minor) & 0x0000FFFF))
>
> //#define CACHE
>
> enum vhd_type {
> VHD_FIXED = 2,
> VHD_DYNAMIC = 3,
> - VHD_DIFFERENCING = 4,
> + VHD_DIFF = 4,
> };
>
> // Seconds since Jan 1, 2000 0:00:00 (UTC)
> @@ -138,6 +148,15 @@ typedef struct BDRVVPCState {
> Error *migration_blocker;
> } BDRVVPCState;
>
> +typedef struct vhd_tdbatmap_header {
> + char magic[8]; /* always "tdbatmap" */
> +
> + uint64_t batmap_offset;
> + uint32_t batmap_size;
> + uint32_t batmap_version;
> + uint32_t checksum;
> +} QEMU_PACKED VHDTdBatmapHeader;
> +
> static uint32_t vpc_checksum(uint8_t* buf, size_t size)
> {
> uint32_t res = 0;
> @@ -153,10 +172,107 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
> static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
> {
> if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
> - return 100;
> + return 100;
> return 0;
> }
>
> +static int vpc_read_backing_loc(VHDDynDiskHeader *dyndisk_header,
> + BlockDriverState *bs,
> + Error **errp)
> +{
> + BDRVVPCState *s = bs->opaque;
> + int64_t data_offset = 0;
> + int data_length = 0;
> + uint32_t platform;
> + bool done = false;
> + int parent_locator_offset = 0;
> + int i;
> + int ret = 0;
> +
> + for (i = 0; i < PARENT_LOCATOR_NUM; i++) {
> + data_offset =
> + be64_to_cpu(dyndisk_header->parent_locator[i].data_offset);
> + data_length =
> + be32_to_cpu(dyndisk_header->parent_locator[i].data_length);
> + platform = dyndisk_header->parent_locator[i].platform;
> +
> + /* Extend the location offset */
> + if (parent_locator_offset < data_offset) {
> + parent_locator_offset = data_offset;
> + }
> +
> + if (done) {
> + continue;
> + }
> +
> + /* Skip "file://" in MacX platform */
> + if (platform == PLATFORM_MACX) {
> + data_offset += MACX_PREFIX_LEN;
> + data_length -= MACX_PREFIX_LEN;
> + }
> +
> + /* Read location of backing file */
> + if (platform == PLATFORM_MACX || platform == PLATFORM_W2RU) {
> + if (data_offset > s->max_table_entries * s->block_size) {
> + return -1;
> + }
> + if (data_length > BDRV_SECTOR_SIZE) {
> + return -1;
> + }
> + ret = bdrv_pread(bs->file, data_offset, bs->backing_file,
> + data_length);
> + if (ret < 0) {
> + return ret;
> + }
> + bs->backing_file[data_length] = '\0';
> + }
> +
> + /* Convert location to ACSII string */
> + if (platform == PLATFORM_MACX) {
> + done = true;
> +
> + } else if (platform == PLATFORM_W2RU) {
> + /* Must be UTF16-LE to ASCII */
> + char *out, *optr;
> + int j;
> +
> + optr = out = (char *) malloc(data_length + 1);
> + if (out == NULL) {
> + ret = -1;
> + return ret;
> + }
> + memset(out, 0, data_length + 1);
> +
> + for (j = 0; j < data_length + 1; j++) {
> + out[j] = bs->backing_file[2*j];
> + }
> + out[data_length + 1] = '\0';
> +
> + while (*optr != '\0') {
> + if (*optr == '\\') {
> + *optr = '/';
> + }
> + optr++;
> + }
> +
> + strncpy(bs->backing_file, out, data_length + 1);
> +
> + out = NULL;
> + free(out);
> +
> + done = true;
> + }
> + }
> +
> + if (bs->backing_file[0] == '\0') {
> + error_setg(errp, "block-vpc: differencing is not support in w2ku");
> + ret = -EINVAL;
> + return ret;
> + }
> +
> + return parent_locator_offset;
> +}
> +
> static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
> Error **errp)
> {
> @@ -164,11 +280,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
> int i;
> VHDFooter *footer;
> VHDDynDiskHeader *dyndisk_header;
> - uint8_t buf[HEADER_SIZE];
> + uint8_t buf[DYNAMIC_HEADER_SIZE];
> + uint8_t tdbatmap_header_buf[TBBATMAP_HEAD_SIZE];
> uint32_t checksum;
> uint64_t computed_size;
> - int disk_type = VHD_DYNAMIC;
> + uint32_t disk_type;
> int ret;
> + VHDTdBatmapHeader *tdbatmap_header;
> + int parent_locator_offset = 0;
>
> ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
> if (ret < 0) {
> @@ -176,6 +295,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
> }
>
> footer = (VHDFooter *) s->footer_buf;
> + disk_type = be32_to_cpu(footer->type);
> +
> if (strncmp(footer->creator, "conectix", 8)) {
> int64_t offset = bdrv_getlength(bs->file);
> if (offset < 0) {
> @@ -230,9 +351,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
> goto fail;
> }
>
> - if (disk_type == VHD_DYNAMIC) {
> + if (disk_type == VHD_DYNAMIC || disk_type == VHD_DIFF) {
> ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
> - HEADER_SIZE);
> + DYNAMIC_HEADER_SIZE);
> if (ret < 0) {
> goto fail;
> }
> @@ -286,6 +407,37 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
> s->free_data_block_offset =
> (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
>
> + /* Read tdbatmap header by offset */
> + if (footer->version >= VHD_VERSION(1, 2)) {
> + ret = bdrv_pread(bs->file, s->free_data_block_offset,
> + tdbatmap_header_buf, TBBATMAP_HEAD_SIZE);
> + if (ret < 0) {
> + goto fail;
> + }
> +
> + tdbatmap_header = (VHDTdBatmapHeader *) tdbatmap_header_buf;
> + if (!strncmp(tdbatmap_header->magic, "tdbatmap", 8)) {
> + s->free_data_block_offset =
> + be32_to_cpu(tdbatmap_header->batmap_size) * 512
> + + be64_to_cpu(tdbatmap_header->batmap_offset);
> + }
> + }
> +
> + /* Read backing file location from dyn header table */
> + if (dyndisk_header->parent_name[0] || dyndisk_header->parent_name[1]) {
> + ret = parent_locator_offset = vpc_read_backing_loc(dyndisk_header,
> + bs, errp);
> + if (ret < 0) {
> + goto fail;
> + }
> + }
> +
> + if (s->free_data_block_offset < parent_locator_offset
> + + BDRV_SECTOR_SIZE) {
> + s->free_data_block_offset = parent_locator_offset
> + + BDRV_SECTOR_SIZE;
> + }
> +
> for (i = 0; i < s->max_table_entries; i++) {
> be32_to_cpus(&s->pagetable[i]);
> if (s->pagetable[i] != 0xFFFFFFFF) {
> @@ -340,35 +492,76 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
> }
>
> /*
> - * Returns the absolute byte offset of the given sector in the image file.
> - * If the sector is not allocated, -1 is returned instead.
> + * Returns the absolute byte offset of the given sector in the differencing
> + * image file.
> + *
> + * If error happened, -1 is returned.
> + *
> + * When write all type or read dynamic, if the sector is not allocated, -2
> + * is returned instead. If the sector is allocated in current file, the block
> + * offset is returned.
> *
> - * The parameter write must be 1 if the offset will be used for a write
> - * operation (the block bitmaps is updated then), 0 otherwise.
> + * When read diff. If the sector is not allocated, -2 is returned instead.
> + * If the sector is allocated in the backing file, -3 is returned. If the
> + * sector is allocated in current file, the block offset is returned.
> */
> static inline int64_t get_sector_offset(BlockDriverState *bs,
> - int64_t sector_num, int write)
> + int64_t sector_num, bool write, bool diff)
> {
> BDRVVPCState *s = bs->opaque;
> - uint64_t offset = sector_num * 512;
> - uint64_t bitmap_offset, block_offset;
> + uint64_t offset = sector_num << BDRV_SECTOR_BITS;
> + uint64_t bitmap_offset;
> uint32_t pagetable_index, pageentry_index;
> + int64_t block_offset = LONG_MIN;
> + int ret;
>
> pagetable_index = offset / s->block_size;
> - pageentry_index = (offset % s->block_size) / 512;
> + pageentry_index = (offset % s->block_size) >> BDRV_SECTOR_BITS;
>
> - if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
> - return -1; // not allocated
> + if (pagetable_index >= s->max_table_entries) {
> + return -2;
> + }
> + if (s->pagetable[pagetable_index] == 0xffffffff) {
> + if (!write && diff) {
> + return -3; /* parent allocated */
> + } else {
> + return -2; /* not allocated */
> + }
> + }
>
> - bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
> - block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
> + bitmap_offset = (uint64_t) s->pagetable[pagetable_index]
> + << BDRV_SECTOR_BITS;
> +
> + if (!diff || write) {
> + block_offset = bitmap_offset + s->bitmap_size
> + + (pageentry_index << BDRV_SECTOR_BITS);
> + } else {
> + uint32_t bitmap_index, bitmapentry_index;
> + uint8_t bitmap[s->bitmap_size];
>
> + if (bitmap_offset > s->max_table_entries * s->block_size) {
> + return -1;
> + }
> + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> + if (ret < 0) {
> + return -1;
> + }
> +
> + bitmap_index = pageentry_index / 8;
> + bitmapentry_index = 7 - pageentry_index % 8;
> + if (bitmap[bitmap_index] & 0x1 << bitmapentry_index) {
> + block_offset = bitmap_offset + s->bitmap_size
> + + (pageentry_index << BDRV_SECTOR_BITS);
> + } else {
> + return -3;
> + }
> + }
> // We must ensure that we don't write to any sectors which are marked as
> // unused in the bitmap. We get away with setting all bits in the block
> // bitmap each time we write to a new block. This might cause Virtual PC to
> // miss sparse read optimization, but it's not a problem in terms of
> // correctness.
> - if (write && (s->last_bitmap_offset != bitmap_offset)) {
> + if (!diff && write && (s->last_bitmap_offset != bitmap_offset)) {
> uint8_t bitmap[s->bitmap_size];
>
> s->last_bitmap_offset = bitmap_offset;
> @@ -376,7 +569,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
> bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> }
>
> -// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
> +// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
> // sector_num, pagetable_index, pageentry_index,
> // bitmap_offset, block_offset);
>
> @@ -437,7 +630,8 @@ static int rewrite_footer(BlockDriverState* bs)
> *
> * Returns the sectors' offset in the image file on success and < 0 on error
> */
> -static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
> +static int64_t alloc_block(BlockDriverState *bs, int64_t sector_num,
> + bool diff)
> {
> BDRVVPCState *s = bs->opaque;
> int64_t bat_offset;
> @@ -457,7 +651,11 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
> s->pagetable[index] = s->free_data_block_offset / 512;
>
> // Initialize the block's bitmap
> - memset(bitmap, 0xff, s->bitmap_size);
> + if (diff) {
> + memset(bitmap, 0x0, s->bitmap_size);
> + } else {
> + memset(bitmap, 0xff, s->bitmap_size);
> + }
> ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
> s->bitmap_size);
> if (ret < 0) {
> @@ -477,7 +675,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
> if (ret < 0)
> goto fail;
>
> - return get_sector_offset(bs, sector_num, 0);
> + return get_sector_offset(bs, sector_num, false, diff);
>
> fail:
> s->free_data_block_offset -= (s->block_size + s->bitmap_size);
> @@ -501,36 +699,66 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
> uint8_t *buf, int nb_sectors)
> {
> BDRVVPCState *s = bs->opaque;
> - int ret;
> - int64_t offset;
> - int64_t sectors, sectors_per_block;
> VHDFooter *footer = (VHDFooter *) s->footer_buf;
> + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> + int64_t offset, sectors;
> + int ret;
>
> - if (be32_to_cpu(footer->type) == VHD_FIXED) {
> + switch (be32_to_cpu(footer->type)) {
> + case VHD_FIXED:
> return bdrv_read(bs->file, sector_num, buf, nb_sectors);
> - }
> - while (nb_sectors > 0) {
> - offset = get_sector_offset(bs, sector_num, 0);
> -
> - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> - sectors = sectors_per_block - (sector_num % sectors_per_block);
> - if (sectors > nb_sectors) {
> - sectors = nb_sectors;
> - }
> + case VHD_DYNAMIC:
> + while (nb_sectors > 0) {
> + sectors = sectors_per_block - (sector_num % sectors_per_block);
> + if (sectors > nb_sectors) {
> + sectors = nb_sectors;
> + }
>
> - if (offset == -1) {
> - memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
> - } else {
> - ret = bdrv_pread(bs->file, offset, buf,
> - sectors * BDRV_SECTOR_SIZE);
> - if (ret != sectors * BDRV_SECTOR_SIZE) {
> + offset = get_sector_offset(bs, sector_num, false, false);
> + if (offset == -1) {
> return -1;
> + } else if (offset == -2) {
> + memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
> + } else {
> + ret = bdrv_pread(bs->file, offset, buf,
> + sectors * BDRV_SECTOR_SIZE);
> + if (ret != sectors * BDRV_SECTOR_SIZE) {
> + return -1;
> + }
> }
> +
> + nb_sectors -= sectors;
> + sector_num += sectors;
> + buf += sectors * BDRV_SECTOR_SIZE;
> }
> + break;
> + case VHD_DIFF:
> + while (nb_sectors > 0) {
> + offset = get_sector_offset(bs, sector_num, false, true);
> + if (offset == -1) {
> + return -1;
> + } else if (offset == -2) {
> + memset(buf, 0, BDRV_SECTOR_SIZE);
> + } else if (offset == -3) {
> + ret = bdrv_pread(bs->backing_hd, sector_num << BDRV_SECTOR_BITS
> + , buf, BDRV_SECTOR_SIZE);
> + if (ret < 0) {
> + return -1;
> + }
> + } else {
> + ret = bdrv_pread(bs->file, offset, buf, BDRV_SECTOR_SIZE);
> + if (ret != BDRV_SECTOR_SIZE) {
> + return -1;
> + }
> + }
>
> - nb_sectors -= sectors;
> - sector_num += sectors;
> - buf += sectors * BDRV_SECTOR_SIZE;
> + nb_sectors--;
> + sector_num++;
> + buf += BDRV_SECTOR_SIZE;
> + }
> + break;
> + default:
> + return -1;
> }
> return 0;
> }
> @@ -546,44 +774,101 @@ static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
> return ret;
> }
>
> -static int vpc_write(BlockDriverState *bs, int64_t sector_num,
> - const uint8_t *buf, int nb_sectors)
> +static inline int64_t write_bitmap(BlockDriverState *bs, int64_t sector_num,
> + int64_t sectors)
> {
> BDRVVPCState *s = bs->opaque;
> - int64_t offset;
> - int64_t sectors, sectors_per_block;
> + uint64_t offset = sector_num << BDRV_SECTOR_BITS;
> + uint64_t bitmap_offset;
> + uint32_t pagetable_index, pageentry_index;
> + uint8_t bitmap[s->bitmap_size];
> + uint32_t bitmap_index, bitmapbit_index;
> + int i;
> int ret;
> - VHDFooter *footer = (VHDFooter *) s->footer_buf;
>
> - if (be32_to_cpu(footer->type) == VHD_FIXED) {
> - return bdrv_write(bs->file, sector_num, buf, nb_sectors);
> + pagetable_index = offset / s->block_size;
> + pageentry_index = (offset % s->block_size) / 512;
> + bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
> +
> + if (bitmap_offset > s->max_table_entries * s->block_size) {
> + return -1;
> + }
> + ret = bdrv_pread(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> + if (ret < 0) {
> + return -1;
> }
> - while (nb_sectors > 0) {
> - offset = get_sector_offset(bs, sector_num, 1);
>
> - sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> - sectors = sectors_per_block - (sector_num % sectors_per_block);
> - if (sectors > nb_sectors) {
> - sectors = nb_sectors;
> + for (i = 0; i < sectors; i++) {
> + bitmap_index = pageentry_index / 8;
> + bitmapbit_index = 7 - pageentry_index % 8;
> + bitmap[bitmap_index] |= (0x1 << bitmapbit_index);
> + pageentry_index++;
> + }
> + ret = bdrv_pwrite(bs->file, bitmap_offset, bitmap, s->bitmap_size);
> + if (ret < 0) {
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int vpc_write(BlockDriverState *bs, int64_t sector_num,
> + const uint8_t *buf, int nb_sectors)
> +{
> + BDRVVPCState *s = bs->opaque;
> + VHDFooter *footer = (VHDFooter *) s->footer_buf;
> + int64_t sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
> + int64_t offset, sectors;
> + bool diff = true;
> + int ret = 0;
> +
> + switch (be32_to_cpu(footer->type)) {
> + case VHD_FIXED:
> + return bdrv_write(bs->file, sector_num, buf, nb_sectors);
> + case VHD_DYNAMIC:
> + case VHD_DIFF:
> + if (be32_to_cpu(footer->type) == VHD_DYNAMIC) {
> + diff = false;
> }
>
> - if (offset == -1) {
> - offset = alloc_block(bs, sector_num);
> - if (offset < 0)
> + while (nb_sectors > 0) {
> + sectors = sectors_per_block - (sector_num % sectors_per_block);
> + if (sectors > nb_sectors) {
> + sectors = nb_sectors;
> + }
> +
> + offset = get_sector_offset(bs, sector_num, true, diff);
> + if (offset == -1) {
> return -1;
> - }
> + } else if (offset == -2) {
> + offset = alloc_block(bs, sector_num, diff);
> + if (offset < 0) {
> + return -1;
> + }
> + }
>
> - ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
> - if (ret != sectors * BDRV_SECTOR_SIZE) {
> - return -1;
> - }
> + ret = bdrv_pwrite(bs->file, offset, buf,
> + sectors * BDRV_SECTOR_SIZE);
> + if (ret != sectors * BDRV_SECTOR_SIZE) {
> + return -1;
> + }
>
> - nb_sectors -= sectors;
> - sector_num += sectors;
> - buf += sectors * BDRV_SECTOR_SIZE;
> - }
> + if (diff) {
> + ret = write_bitmap(bs, sector_num, sectors);
> + if (ret < 0) {
> + return -1;
> + }
> + }
>
> - return 0;
> + nb_sectors -= sectors;
> + sector_num += sectors;
> + buf += sectors * BDRV_SECTOR_SIZE;
> + }
> + break;
> + default:
> + return -1;
> + }
> + return ret;
> }
>
> static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
> @@ -911,6 +1196,7 @@ static BlockDriver bdrv_vpc = {
> .bdrv_close = vpc_close,
> .bdrv_reopen_prepare = vpc_reopen_prepare,
> .bdrv_create = vpc_create,
> + .supports_backing = true,
>
> .bdrv_read = vpc_co_read,
> .bdrv_write = vpc_co_write,
> --
> 1.8.3.1
>
>
[Prev in Thread] | Current Thread | [Next in Thread] |