qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support frame


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support framework - supports open and probe
Date: Tue, 23 Apr 2013 17:46:28 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

Am 23.04.2013 um 16:24 hat Jeff Cody geschrieben:
> This is the initial block driver framework for VHDX image support (
> i.e. Hyper-V image file formats), that supports opening VHDX files, and
> parsing the headers.
> 
> This commit does not yet enable:
>     - reading
>     - writing
>     - updating the header
>     - differencing files (images with parents)
>     - log replay / dirty logs (only clean images)
> 
> This is based on Microsoft's VHDX specification:
>     "VHDX Format Specification v0.95", published 4/12/2012
>     https://www.microsoft.com/en-us/download/details.aspx?id=29681
> 
> Signed-off-by: Jeff Cody <address@hidden>

Okay, I need more time actually reading the spec and comparing this code
to it before I can really say anything about the logic (I hope I'll get
to it tomorrow), but for a start, let me point out the obvious things.

> ---
>  block/Makefile.objs |   1 +
>  block/vhdx.c        | 769 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/vhdx.h        |  25 ++
>  3 files changed, 795 insertions(+)
>  create mode 100644 block/vhdx.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 6c4b5bc..5f0358a 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o 
> bochs.o vpc.o vvfat
>  block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
> qcow2-cache.o
>  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-obj-y += qed-check.o
> +block-obj-y += vhdx.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
> diff --git a/block/vhdx.c b/block/vhdx.c
> new file mode 100644
> index 0000000..b0ea2ba
> --- /dev/null
> +++ b/block/vhdx.c
> @@ -0,0 +1,769 @@
> +/*
> + * Block driver for Hyper-V VHDX Images
> + *
> + * Copyright (c) 2013 Red Hat, Inc.,
> + *
> + * Authors:
> + *  Jeff Cody <address@hidden>
> + *
> + *  This is based on the "VHDX Format Specification v0.95", published 
> 4/12/2012
> + *  by Microsoft:
> + *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING.LIB file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "block/block_int.h"
> +#include "qemu/module.h"
> +#include "qemu/crc32c.h"
> +#include "block/vhdx.h"
> +
> +
> +/* Several metadata and region table data entries are identified by
> + * guids in  a MS-specific GUID format. */
> +
> +
> +/* ------- Known Region Table GUIDs ---------------------- */
> +static const ms_guid bat_guid =      { .data1 = 0x2dc27766,
> +                                       .data2 = 0xf623,
> +                                       .data3 = 0x4200,
> +                                       .data4 = { 0x9d, 0x64, 0x11, 0x5e,
> +                                                  0x9b, 0xfd, 0x4a, 0x08} };
> +
> +static const ms_guid metadata_guid = { .data1 = 0x8b7ca206,
> +                                       .data2 = 0x4790,
> +                                       .data3 = 0x4b9a,
> +                                       .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
> +                                                  0x05, 0x0f, 0x88, 0x6e} };
> +
> +
> +
> +/* ------- Known Metadata Entry GUIDs ---------------------- */
> +static const ms_guid file_param_guid =   { .data1 = 0xcaa16737,
> +                                           .data2 = 0xfa36,
> +                                           .data3 = 0x4d43,
> +                                           .data4 = { 0xb3, 0xb6, 0x33, 0xf0,
> +                                                      0xaa, 0x44, 0xe7, 
> 0x6b} };
> +
> +static const ms_guid virtual_size_guid = { .data1 = 0x2FA54224,
> +                                           .data2 = 0xcd1b,
> +                                           .data3 = 0x4876,
> +                                           .data4 = { 0xb2, 0x11, 0x5d, 0xbe,
> +                                                      0xd8, 0x3b, 0xf4, 
> 0xb8} };
> +
> +static const ms_guid page83_guid =       { .data1 = 0xbeca12ab,
> +                                           .data2 = 0xb2e6,
> +                                           .data3 = 0x4523,
> +                                           .data4 = { 0x93, 0xef, 0xc3, 0x09,
> +                                                      0xe0, 0x00, 0xc7, 
> 0x46} };
> +
> +
> +static const ms_guid phys_sector_guid =  { .data1 = 0xcda348c7,
> +                                           .data2 = 0x445d,
> +                                           .data3 = 0x4471,
> +                                           .data4 = { 0x9c, 0xc9, 0xe9, 0x88,
> +                                                      0x52, 0x51, 0xc5, 
> 0x56} };
> +
> +static const ms_guid parent_locator_guid = { .data1 = 0xa8d35f2d,
> +                                             .data2 = 0xb30b,
> +                                             .data3 = 0x454d,
> +                                             .data4 = { 0xab, 0xf7, 0xd3,
> +                                                        0xd8, 0x48, 0x34,
> +                                                        0xab, 0x0c} };
> +
> +static const ms_guid logical_sector_guid = { .data1 = 0x8141bf1d,
> +                                             .data2 = 0xa96f,
> +                                             .data3 = 0x4709,
> +                                             .data4 = { 0xba, 0x47, 0xf2,
> +                                                        0x33, 0xa8, 0xfa,
> +                                                        0xab, 0x5f} };
> +
> +/* Each parent type must have a valid GUID; this is for parent images
> + * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
> + * need to make up our own QCOW2 GUID type */
> +static const ms_guid parent_vhdx_guid = { .data1 = 0xb04aefb7,
> +                                          .data2 = 0xd19e,
> +                                          .data3 = 0x4a81,
> +                                          .data4 = { 0xb7, 0x89, 0x25, 0xb8,
> +                                                     0xe9, 0x44, 0x59, 0x13} 
> };
> +
> +
> +#define META_FILE_PARAMETER_PRESENT      0x01
> +#define META_VIRTUAL_DISK_SIZE_PRESENT   0x02
> +#define META_PAGE_83_PRESENT             0x04
> +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
> +#define META_PHYS_SECTOR_SIZE_PRESENT    0x10
> +#define META_PARENT_LOCATOR_PRESENT      0x20
> +
> +#define META_ALL_PRESENT    \
> +    (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
> +     META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
> +     META_PHYS_SECTOR_SIZE_PRESENT)
> +
> +typedef struct vhdx_metadata_entries {
> +    vhdx_metadata_table_entry file_parameters_entry;
> +    vhdx_metadata_table_entry virtual_disk_size_entry;
> +    vhdx_metadata_table_entry page83_data_entry;
> +    vhdx_metadata_table_entry logical_sector_size_entry;
> +    vhdx_metadata_table_entry phys_sector_size_entry;
> +    vhdx_metadata_table_entry parent_locator_entry;
> +    uint16_t present;
> +} vhdx_metadata_entries;

Should everything before this line actually be in the header?

> +
> +typedef struct BDRVVHDXState {
> +    CoMutex lock;
> +
> +    int curr_header;
> +    vhdx_header *headers[2];
> +
> +    vhdx_region_table_header rt;
> +    vhdx_region_table_entry bat_rt;         /* region table for the BAT */
> +    vhdx_region_table_entry metadata_rt;    /* region table for the metadata 
> */
> +
> +    vhdx_metadata_table_header  metadata_hdr;
> +    vhdx_metadata_entries metadata_entries;
> +
> +    vhdx_file_parameters params;
> +    uint32_t block_size;
> +    uint32_t block_size_bits;
> +    uint32_t sectors_per_block;
> +    uint32_t sectors_per_block_bits;
> +
> +    uint64_t virtual_disk_size;
> +    uint32_t logical_sector_size;
> +    uint32_t physical_sector_size;
> +
> +    uint64_t chunk_ratio;
> +    uint32_t chunk_ratio_bits;
> +    uint32_t logical_sector_size_bits;
> +
> +    uint32_t bat_entries;
> +    vhdx_bat_entry *bat;
> +    uint64_t bat_offset;
> +
> +    vhdx_parent_locator_header parent_header;
> +    vhdx_parent_locator_entry *parent_entries;
> +
> +} BDRVVHDXState;
> +
> +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
> +                            int crc_offset)
> +{
> +    uint32_t crc_new;
> +    uint32_t crc_orig;
> +    assert(buf != NULL);
> +
> +    if (crc_offset > 0) {
> +        memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));

Please add spaces before and after operators. I saw this a lot in this
series. I'm relatively sure that you can use checkpatch.pl to get all
instances pointed out.

> +        memset(buf+crc_offset, 0, sizeof(crc_orig));
> +    }
> +
> +    crc_new = crc32c(crc, buf, size);
> +    if (crc_offset > 0) {
> +        memcpy(buf+crc_offset, &crc_orig, sizeof(crc_orig));
> +    }
> +
> +    return crc_new;
> +}
> +
> +/* Validates the checksum of the buffer, with an in-place CRC.
> + *
> + * Zero is substituted during crc calculation for the original crc field,
> + * and the crc field is restored afterwards.  But the buffer will be modifed
> + * during the calculation, so this may not be not suitable for multi-threaded
> + * use.
> + *
> + * crc_offset: byte offset in buf of the buffer crc
> + * buf: buffer pointer
> + * size: size of buffer (must be > crc_offset+4)
> + *
> + * returns true if checksum is valid, false otherwise
> + */
> +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
> +{
> +    uint32_t crc_orig;
> +    uint32_t crc;
> +
> +    assert(buf != NULL);
> +    assert(size > (crc_offset+4));
> +
> +    memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> +    crc_orig = le32_to_cpu(crc_orig);
> +
> +    crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
> +
> +    return crc == crc_orig;
> +}
> +
> +
> +/*
> + * Per the MS VHDX Specification, for every VHDX file:
> + *      - The header section is fixed size - 1 MB
> + *      - The header section is always the first "object"
> + *      - The first 64KB of the header is the File Identifier
> + *      - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
> + *      - The following 512 bytes constitute a UTF-16 string identifiying the
> + *        software that created the file, and is optional and diagnostic 
> only.
> + *
> + *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
> + */
> +static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
> +{
> +    if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {

There's a VHDX_FILE_ID_MAGIC constant in the header. Don't you want to
use it?

> +        return 100;
> +    }
> +    return 0;
> +}
> +
> +/* All VHDX structures on disk are little endian */
> +static void vhdx_header_le_import(vhdx_header *h)
> +{
> +    assert(h != NULL);
> +
> +    le32_to_cpus(&h->signature);
> +    le32_to_cpus(&h->checksum);
> +    le64_to_cpus(&h->sequence_number);
> +
> +    leguid_to_cpus(&h->file_write_guid);
> +    leguid_to_cpus(&h->data_write_guid);
> +    leguid_to_cpus(&h->log_guid);
> +
> +    le16_to_cpus(&h->log_version);
> +    le16_to_cpus(&h->version);
> +    le32_to_cpus(&h->log_length);
> +    le64_to_cpus(&h->log_offset);
> +}
> +
> +
> +/* opens the specified header block from the VHDX file header section */
> +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    vhdx_header *header1;
> +    vhdx_header *header2;
> +    uint64_t h1_seq = 0;
> +    uint64_t h2_seq = 0;
> +    uint8_t *buffer;
> +
> +    header1 = qemu_blockalign(bs, sizeof(vhdx_header));
> +    header2 = qemu_blockalign(bs, sizeof(vhdx_header));
> +
> +    buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
> +
> +    s->headers[0] = header1;
> +    s->headers[1] = header2;
> +
> +    /* We have to read the whole VHDX_HEADER_SIZE instead of
> +     * sizeof(vhdx_header), because the checksum is over the whole
> +     * region */
> +    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, 
> VHDX_HEADER_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    /* copy over just the relevant portion that we need */
> +    memcpy(header1, buffer, sizeof(vhdx_header));
> +    vhdx_header_le_import(header1);
> +
> +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> +        header1->signature == VHDX_HDR_MAGIC) {
> +        h1_seq = header1->sequence_number;
> +    }
> +
> +    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, 
> VHDX_HEADER_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    /* copy over just the relevant portion that we need */
> +    memcpy(header2, buffer, sizeof(vhdx_header));
> +    vhdx_header_le_import(header2);
> +
> +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> +        header2->signature == VHDX_HDR_MAGIC) {
> +        h2_seq = header2->sequence_number;
> +    }
> +
> +    if (h1_seq > h2_seq) {
> +        s->curr_header = 0;
> +    } else if (h2_seq > h1_seq) {
> +        s->curr_header = 1;
> +    } else {
> +        printf("NO VALID HEADER\n");

Make that an qerror_report() at least.

> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    ret = 0;
> +
> +    goto exit;
> +
> +fail:
> +    qemu_vfree(header1);
> +    qemu_vfree(header2);
> +    s->headers[0] = NULL;
> +    s->headers[1] = NULL;
> +exit:
> +    qemu_vfree(buffer);
> +    return ret;
> +}
> +
> +
> +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    uint8_t *buffer;
> +    int offset = 0;
> +    vhdx_region_table_entry rt_entry;
> +    int i;
> +
> +    /* We have to read the whole 64KB block, because the crc32 is over the
> +     * whole block */
> +    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
> +
> +    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
> +                    VHDX_HEADER_BLOCK_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    memcpy(&s->rt, buffer, sizeof(s->rt));
> +    le32_to_cpus(&s->rt.signature);
> +    le32_to_cpus(&s->rt.checksum);
> +    le32_to_cpus(&s->rt.entry_count);
> +    le32_to_cpus(&s->rt.reserved);
> +    offset += sizeof(s->rt);
> +
> +    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
> +        s->rt.signature != VHDX_RT_MAGIC) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    for (i = 0; i < s->rt.entry_count; i++) {
> +        memcpy(&rt_entry, buffer+offset, sizeof(rt_entry));
> +        offset += sizeof(rt_entry);
> +
> +        leguid_to_cpus(&rt_entry.guid);
> +        le64_to_cpus(&rt_entry.file_offset);
> +        le32_to_cpus(&rt_entry.length);
> +        le32_to_cpus(&rt_entry.data_bits);
> +
> +        /* see if we recognize the entry */
> +        if (guid_eq(rt_entry.guid, bat_guid)) {
> +            s->bat_rt = rt_entry;
> +            continue;
> +        }
> +
> +        if (guid_eq(rt_entry.guid, metadata_guid)) {
> +            s->metadata_rt = rt_entry;
> +            continue;
> +        }
> +
> +        if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
> +            /* cannot read vhdx file - required region table entry that
> +             * we do not understand.  per spec, we must fail to open */
> +            ret = -ENOTSUP;
> +            goto fail;
> +        }
> +    }
> +    ret = 0;
> +
> +fail:
> +    qemu_vfree(buffer);
> +    return ret;
> +}
> +
> +
> +
> +/* Metadata initial parser
> + *
> + * This loads all the metadata entry fields.  This may cause additional
> + * fields to be processed (e.g. parent locator, etc..).
> + *
> + * There are 5 Metadata items that are always required:
> + *      - File Parameters (block size, has a parent)
> + *      - Virtual Disk Size (size, in bytes, of the virtual drive)
> + *      - Page 83 Data (scsi page 83 guid)
> + *      - Logical Sector Size (logical sector size in bytes, either 512 or
> + *                             4096.  We only support 512 currently)
> + *      - Physical Sector Size (512 or 4096)
> + *
> + * Also, if the File Parameters indicate this is a differencing file,
> + * we must also look for the Parent Locator metadata item.
> + */
> +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    uint8_t *buffer;
> +    int offset = 0;
> +    int i = 0;
> +    uint32_t block_size, sectors_per_block, logical_sector_size;
> +    uint64_t chunk_ratio;
> +    vhdx_metadata_table_entry md_entry;
> +
> +    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
> +
> +    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
> +                     VHDX_METADATA_TABLE_MAX_SIZE);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
> +    offset += sizeof(s->metadata_hdr);
> +
> +    le64_to_cpus(&s->metadata_hdr.signature);
> +    le16_to_cpus(&s->metadata_hdr.reserved);
> +    le16_to_cpus(&s->metadata_hdr.entry_count);
> +
> +    if (s->metadata_hdr.signature != VHDX_METADATA_MAGIC) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    s->metadata_entries.present = 0;
> +
> +    if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
> +        (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    for (i = 0; i < s->metadata_hdr.entry_count; i++) {
> +        memcpy(&md_entry, buffer+offset, sizeof(md_entry));
> +        offset += sizeof(md_entry);
> +
> +        leguid_to_cpus(&md_entry.item_id);
> +        le32_to_cpus(&md_entry.offset);
> +        le32_to_cpus(&md_entry.length);
> +        le32_to_cpus(&md_entry.data_bits);
> +        le32_to_cpus(&md_entry.reserved2);
> +
> +        if (guid_eq(md_entry.item_id, file_param_guid)) {
> +            s->metadata_entries.file_parameters_entry = md_entry;
> +            s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, virtual_size_guid)) {
> +            s->metadata_entries.virtual_disk_size_entry = md_entry;
> +            s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, page83_guid)) {
> +            s->metadata_entries.page83_data_entry = md_entry;
> +            s->metadata_entries.present |= META_PAGE_83_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, logical_sector_guid)) {
> +            s->metadata_entries.logical_sector_size_entry = md_entry;
> +            s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, phys_sector_guid)) {
> +            s->metadata_entries.phys_sector_size_entry = md_entry;
> +            s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, parent_locator_guid)) {
> +            s->metadata_entries.parent_locator_entry = md_entry;
> +            s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
> +            continue;
> +        }
> +
> +        if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
> +            /* cannot read vhdx file - required region table entry that
> +             * we do not understand.  per spec, we must fail to open */
> +            ret = -ENOTSUP;
> +            goto exit;
> +        }
> +    }
> +
> +    if (s->metadata_entries.present != META_ALL_PRESENT) {
> +        ret = -ENOTSUP;
> +        goto exit;
> +    }
> +
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.file_parameters_entry.offset
> +                                         + s->metadata_rt.file_offset,
> +                     &s->params,
> +                     sizeof(s->params));
> +
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    le32_to_cpus(&s->params.block_size);
> +    le32_to_cpus(&s->params.data_bits);
> +
> +
> +    /* We now have the file parameters, so we can tell if this is a
> +     * differencing file (i.e.. has_parent), is dynamic or fixed
> +     * sized (leave_blocks_allocated), and the block size */
> +
> +    /* The parent locator required iff the file parameters has_parent set */
> +    if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
> +        if (s->metadata_entries.present & ~META_PARENT_LOCATOR_PRESENT) {
> +            /* TODO: parse  parent locator fields */
> +            ret = -ENOTSUP; /* temp, until differencing files are supported 
> */
> +            goto exit;
> +        } else {
> +            /* if has_parent is set, but there is not parent locator present,
> +             * then that is an invalid combination */
> +            ret = -EINVAL;
> +            goto exit;
> +        }
> +    }
> +
> +    /* determine virtual disk size, logical sector size,
> +     * and phys sector size */
> +
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.virtual_disk_size_entry.offset
> +                                           + s->metadata_rt.file_offset,
> +                     &s->virtual_disk_size,
> +                     sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.logical_sector_size_entry.offset
> +                                             + s->metadata_rt.file_offset,
> +                     &s->logical_sector_size,
> +                     sizeof(uint32_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.phys_sector_size_entry.offset
> +                                          + s->metadata_rt.file_offset,
> +                     &s->physical_sector_size,
> +                     sizeof(uint32_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    le64_to_cpus(&s->virtual_disk_size);
> +    le32_to_cpus(&s->logical_sector_size);
> +    le32_to_cpus(&s->physical_sector_size);
> +
> +    if (s->logical_sector_size == 0 || s->params.block_size == 0) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    /* both block_size and sector_size are guaranteed powers of 2 */
> +    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
> +    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
> +                     (uint64_t)s->logical_sector_size /
> +                     (uint64_t)s->params.block_size;
> +
> +    /* These values are ones we will want to use for division / 
> multiplication
> +     * later on, and they are all guaranteed (per the spec) to be powers of 
> 2,
> +     * so we can take advantage of that for shift operations during
> +     * reads/writes */
> +    logical_sector_size = s->logical_sector_size;
> +    if (logical_sector_size & (logical_sector_size - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    sectors_per_block = s->sectors_per_block;
> +    if (sectors_per_block & (sectors_per_block - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +    chunk_ratio = s->chunk_ratio;
> +    if (chunk_ratio & (chunk_ratio - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +    block_size = s->params.block_size;
> +    if (block_size & (block_size - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    while (logical_sector_size >>= 1) {
> +        s->logical_sector_size_bits++;
> +    }
> +    while (sectors_per_block >>= 1) {
> +        s->sectors_per_block_bits++;
> +    }
> +    while (chunk_ratio >>= 1) {
> +        s->chunk_ratio_bits++;
> +    }
> +    while (block_size >>= 1) {
> +        s->block_size_bits++;
> +    }
> +
> +    if (s->logical_sector_size != BDRV_SECTOR_SIZE) {
> +        printf("VHDX error - QEMU only supports 512 byte sector sizes\n");
> +        ret = -ENOTSUP;
> +        goto exit;
> +    }
> +
> +    ret = 0;
> +
> +exit:
> +    qemu_vfree(buffer);
> +    return ret;
> +}
> +
> +/* Parse the replay log.  Per the VHDX spec, if the log is present
> + * it must be replayed prior to opening the file, even read-only.
> + *
> + * If read-only, we must replay the log in RAM (or refuse to open
> + * a dirty VHDX file read-only */
> +static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    int i;
> +    vhdx_header *hdr;
> +
> +    hdr = s->headers[s->curr_header];
> +
> +    /* either either the log guid, or log length is zero,
> +     * then a replay log is present */
> +    for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
> +        ret |= hdr->log_guid.data4[i];
> +    }
> +    if (hdr->log_guid.data1 == 0 &&
> +        hdr->log_guid.data2 == 0 &&
> +        hdr->log_guid.data3 == 0 &&
> +        ret == 0) {
> +        goto exit;
> +    }
> +
> +    /* per spec, only log version of 0 is supported */
> +    if (hdr->log_version != 0) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    if (hdr->log_length == 0) {
> +        goto exit;
> +    }
> +
> +    /* We currently do not support images with logs to replay */
> +    ret = -ENOTSUP;
> +
> +exit:
> +    return ret;
> +}
> +
> +
> +static int vhdx_open(BlockDriverState *bs, QDict *options, int flags)
> +{
> +    BDRVVHDXState *s = bs->opaque;
> +    int ret = 0;
> +    int i;
> +
> +    s->bat = NULL;
> +
> +    qemu_co_mutex_init(&s->lock);
> +
> +    ret = vhdx_parse_header(bs, s);
> +    if (ret) {
> +        goto fail;
> +    }
> +
> +    ret = vhdx_parse_log(bs, s);
> +    if (ret) {
> +        goto fail;
> +    }
> +
> +    ret = vhdx_open_region_tables(bs, s);
> +    if (ret) {
> +        goto fail;
> +    }
> +
> +    ret = vhdx_parse_metadata(bs, s);
> +    if (ret) {
> +        goto fail;
> +    }
> +    s->block_size = s->params.block_size;
> +
> +    /* the VHDX spec dictates that virtual_disk_size is always a multiple of
> +     * logical_sector_size */
> +    bs->total_sectors = s->virtual_disk_size / s->logical_sector_size;
> +
> +    s->bat_offset = s->bat_rt.file_offset;
> +    s->bat_entries = s->bat_rt.length / sizeof(vhdx_bat_entry);
> +    s->bat = qemu_blockalign(bs, s->bat_rt.length);
> +
> +    ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);

No error checking?

> +    for (i = 0; i < s->bat_entries; i++) {
> +        le64_to_cpus(&s->bat[i]);
> +    }
> +
> +    if (flags & BDRV_O_RDWR) {
> +        ret = -ENOTSUP;
> +        goto fail;
> +    }
> +
> +    /* TODO: differencing files, read, write */
> +
> +    return 0;
> +fail:
> +    qemu_vfree(s->bat);

This doesn't consider all the structs that were allocated by functions
called in vhdx_open(). Here the same things should be freed as in
vhdx_close().

> +    return ret;
> +}
> +
> +static int vhdx_reopen_prepare(BDRVReopenState *state,
> +                               BlockReopenQueue *queue, Error **errp)
> +{
> +    return 0;
> +}
> +
> +
> +static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t 
> sector_num,
> +                                      int nb_sectors, QEMUIOVector *qiov)
> +{
> +    return -ENOTSUP;
> +}
> +
> +
> +
> +static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t 
> sector_num,
> +                                      int nb_sectors, QEMUIOVector *qiov)
> +{
> +    return -ENOTSUP;
> +}
> +
> +
> +static void vhdx_close(BlockDriverState *bs)
> +{
> +    BDRVVHDXState *s = bs->opaque;
> +
> +    qemu_vfree(s->headers[0]);
> +    qemu_vfree(s->headers[1]);
> +    qemu_vfree(s->bat);
> +    qemu_vfree(s->parent_entries);
> +}
> +
> +static BlockDriver bdrv_vhdx = {
> +    .format_name    = "vhdx",
> +    .instance_size  = sizeof(BDRVVHDXState),

Use the same alignment for = as below?

> +    .bdrv_probe             = vhdx_probe,
> +    .bdrv_open              = vhdx_open,
> +    .bdrv_close             = vhdx_close,
> +    .bdrv_reopen_prepare    = vhdx_reopen_prepare,
> +    .bdrv_co_readv          = vhdx_co_readv,
> +    .bdrv_co_writev         = vhdx_co_writev,
> +};

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]