[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] Re: [PATCH v4 2/5] qed: Add QEMU Enhanced Disk image format
From: |
Kevin Wolf |
Subject: |
[Qemu-devel] Re: [PATCH v4 2/5] qed: Add QEMU Enhanced Disk image format |
Date: |
Fri, 12 Nov 2010 16:43:01 +0100 |
User-agent: |
Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.15) Gecko/20101027 Fedora/3.0.10-1.fc12 Thunderbird/3.0.10 |
Am 28.10.2010 13:01, schrieb Stefan Hajnoczi:
> This patch introduces the qed on-disk layout and implements image
> creation. Later patches add read/write and other functionality.
>
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
> Makefile.objs | 1 +
> block/qed.c | 548
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> block/qed.h | 145 +++++++++++++++
> block_int.h | 1 +
> 4 files changed, 695 insertions(+), 0 deletions(-)
> create mode 100644 block/qed.c
> create mode 100644 block/qed.h
>
> diff --git a/Makefile.objs b/Makefile.objs
> index f07fb01..7bae72a 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -14,6 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>
> block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o
> vpc.o vvfat.o
> block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
> +block-nested-y += qed.o
> block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
> block-nested-$(CONFIG_WIN32) += raw-win32.o
> block-nested-$(CONFIG_POSIX) += raw-posix.o
> diff --git a/block/qed.c b/block/qed.c
> new file mode 100644
> index 0000000..8469cf0
> --- /dev/null
> +++ b/block/qed.c
> @@ -0,0 +1,548 @@
> +/*
> + * QEMU Enhanced Disk Format
> + *
> + * Copyright IBM, Corp. 2010
> + *
> + * Authors:
> + * Stefan Hajnoczi <address@hidden>
> + * Anthony Liguori <address@hidden>
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING.LIB file in the top-level directory.
> + *
> + */
> +
> +#include "qed.h"
> +
> +static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
> + const char *filename)
> +{
> + const QEDHeader *header = (const QEDHeader *)buf;
> +
> + if (buf_size < sizeof(*header)) {
> + return 0;
> + }
> + if (le32_to_cpu(header->magic) != QED_MAGIC) {
> + return 0;
> + }
> + return 100;
> +}
> +
> +/**
> + * Check whether an image format is raw
> + *
> + * @fmt: Backing file format, may be NULL
> + */
> +static bool qed_fmt_is_raw(const char *fmt)
> +{
> + return fmt && strcmp(fmt, "raw") == 0;
> +}
People shouldn't use them directly, but should we also consider file,
host_device, etc.?
> +
> +static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
> +{
> + cpu->magic = le32_to_cpu(le->magic);
> + cpu->cluster_size = le32_to_cpu(le->cluster_size);
> + cpu->table_size = le32_to_cpu(le->table_size);
> + cpu->header_size = le32_to_cpu(le->header_size);
> + cpu->features = le64_to_cpu(le->features);
> + cpu->compat_features = le64_to_cpu(le->compat_features);
> + cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
> + cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
> + cpu->image_size = le64_to_cpu(le->image_size);
> + cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
> + cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
> +}
> +
> +static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
> +{
> + le->magic = cpu_to_le32(cpu->magic);
> + le->cluster_size = cpu_to_le32(cpu->cluster_size);
> + le->table_size = cpu_to_le32(cpu->table_size);
> + le->header_size = cpu_to_le32(cpu->header_size);
> + le->features = cpu_to_le64(cpu->features);
> + le->compat_features = cpu_to_le64(cpu->compat_features);
> + le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
> + le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
> + le->image_size = cpu_to_le64(cpu->image_size);
> + le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
> + le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
> +}
> +
> +static int qed_write_header_sync(BDRVQEDState *s)
> +{
> + QEDHeader le;
> + int ret;
> +
> + qed_header_cpu_to_le(&s->header, &le);
> + ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
> + if (ret != sizeof(le)) {
> + return ret;
> + }
> + return 0;
> +}
> +
> +static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t
> table_size)
> +{
> + uint64_t table_entries;
> + uint64_t l2_size;
> +
> + table_entries = (table_size * cluster_size) / sizeof(uint64_t);
> + l2_size = table_entries * cluster_size;
> +
> + return l2_size * table_entries;
> +}
> +
> +static bool qed_is_cluster_size_valid(uint32_t cluster_size)
> +{
> + if (cluster_size < QED_MIN_CLUSTER_SIZE ||
> + cluster_size > QED_MAX_CLUSTER_SIZE) {
> + return false;
> + }
> + if (cluster_size & (cluster_size - 1)) {
> + return false; /* not power of 2 */
> + }
> + return true;
> +}
> +
> +static bool qed_is_table_size_valid(uint32_t table_size)
> +{
> + if (table_size < QED_MIN_TABLE_SIZE ||
> + table_size > QED_MAX_TABLE_SIZE) {
> + return false;
> + }
> + if (table_size & (table_size - 1)) {
> + return false; /* not power of 2 */
> + }
> + return true;
> +}
> +
> +static bool qed_is_image_size_valid(uint64_t image_size, uint32_t
> cluster_size,
> + uint32_t table_size)
> +{
> + if (image_size % BDRV_SECTOR_SIZE != 0) {
> + return false; /* not multiple of sector size */
> + }
> + if (image_size > qed_max_image_size(cluster_size, table_size)) {
> + return false; /* image is too large */
> + }
> + return true;
> +}
> +
> +/**
> + * Read a string of known length from the image file
> + *
> + * @file: Image file
> + * @offset: File offset to start of string, in bytes
> + * @n: String length in bytes
> + * @buf: Destination buffer
> + * @buflen: Destination buffer length in bytes
> + * @ret: 0 on success, -errno on failure
> + *
> + * The string is NUL-terminated.
> + */
> +static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
> + char *buf, size_t buflen)
> +{
> + int ret;
> + if (n >= buflen) {
> + return -EINVAL;
> + }
> + ret = bdrv_pread(file, offset, buf, n);
> + if (ret < 0) {
> + return ret;
> + }
> + buf[n] = '\0';
> + return 0;
> +}
> +
> +static int bdrv_qed_open(BlockDriverState *bs, int flags)
> +{
> + BDRVQEDState *s = bs->opaque;
> + QEDHeader le_header;
> + int64_t file_size;
> + int ret;
> +
> + s->bs = bs;
> +
> + ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
> + if (ret < 0) {
> + return ret;
> + }
> + ret = 0; /* ret should always be 0 or -errno */
> + qed_header_le_to_cpu(&le_header, &s->header);
> +
> + if (s->header.magic != QED_MAGIC) {
> + return -ENOENT;
> + }
ENOENT seems a bit odd for a wrong magic number, especially if it's used
for the error message. Wouldn't EINVAL or ENOTSUP be a closer match?
> + if (s->header.features & ~QED_FEATURE_MASK) {
> + return -ENOTSUP; /* image uses unsupported feature bits */
> + }
> + if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
> + return -EINVAL;
> + }
> +
> + /* Round down file size to the last cluster */
> + file_size = bdrv_getlength(bs->file);
> + if (file_size < 0) {
> + return file_size;
> + }
> + s->file_size = qed_start_of_cluster(s, file_size);
> +
> + if (!qed_is_table_size_valid(s->header.table_size)) {
> + return -EINVAL;
> + }
> + if (!qed_is_image_size_valid(s->header.image_size,
> + s->header.cluster_size,
> + s->header.table_size)) {
> + return -EINVAL;
> + }
> + if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
> + return -EINVAL;
> + }
> +
> + s->table_nelems = (s->header.cluster_size * s->header.table_size) /
> + sizeof(uint64_t);
> + s->l2_shift = ffs(s->header.cluster_size) - 1;
> + s->l2_mask = s->table_nelems - 1;
> + s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
> +
> + if ((s->header.features & QED_F_BACKING_FILE)) {
> + ret = qed_read_string(bs->file, s->header.backing_filename_offset,
> + s->header.backing_filename_size,
> bs->backing_file,
> + sizeof(bs->backing_file));
> + if (ret < 0) {
> + return ret;
> + }
> +
> + if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
> + pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
> + }
> + }
> +
> + /* Reset unknown autoclear feature bits. This is a backwards
> + * compatibility mechanism that allows images to be opened by older
> + * programs, which "knock out" unknown feature bits. When an image is
> + * opened by a newer program again it can detect that the autoclear
> + * feature is no longer valid.
> + */
> + if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
> + !bdrv_is_read_only(bs->file)) {
> + s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
> +
> + ret = qed_write_header_sync(s);
> + if (ret) {
> + return ret;
> + }
> +
> + /* From here on only known autoclear feature bits are valid */
> + bdrv_flush(bs->file);
> + }
> +
> + return ret;
> +}
> +
> +static void bdrv_qed_close(BlockDriverState *bs)
> +{
> +}
> +
> +static void bdrv_qed_flush(BlockDriverState *bs)
> +{
> + bdrv_flush(bs->file);
> +}
This conflicts with one of my recent changes. bdrv_flush should return
an int now.
> +
> +static int qed_create(const char *filename, uint32_t cluster_size,
> + uint64_t image_size, uint32_t table_size,
> + const char *backing_file, const char *backing_fmt)
> +{
> + QEDHeader header = {
> + .magic = QED_MAGIC,
> + .cluster_size = cluster_size,
> + .table_size = table_size,
> + .header_size = 1,
> + .features = 0,
> + .compat_features = 0,
> + .l1_table_offset = cluster_size,
> + .image_size = image_size,
> + };
> + QEDHeader le_header;
> + uint8_t *l1_table = NULL;
> + size_t l1_size = header.cluster_size * header.table_size;
> + int ret = 0;
> + BlockDriverState *bs = NULL;
> +
> + ret = bdrv_create_file(filename, NULL);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR | BDRV_O_CACHE_WB);
> + if (ret < 0) {
> + return ret;
> + }
> +
> + if (backing_file) {
> + header.features |= QED_F_BACKING_FILE;
> + header.backing_filename_offset = sizeof(le_header);
> + header.backing_filename_size = strlen(backing_file);
> +
> + if (qed_fmt_is_raw(backing_fmt)) {
> + header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
> + }
> + }
> +
> + qed_header_cpu_to_le(&header, &le_header);
> + ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
> + if (ret < 0) {
> + goto out;
> + }
> + ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
> + header.backing_filename_size);
> + if (ret < 0) {
> + goto out;
> + }
> +
> + l1_table = qemu_mallocz(l1_size);
> + ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
> + if (ret < 0) {
> + goto out;
> + }
> +
> + ret = 0; /* success */
> +out:
> + qemu_free(l1_table);
> + bdrv_delete(bs);
> + return ret;
> +}
> +
> +static int bdrv_qed_create(const char *filename, QEMUOptionParameter
> *options)
> +{
> + uint64_t image_size = 0;
> + uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
> + uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
> + const char *backing_file = NULL;
> + const char *backing_fmt = NULL;
> +
> + while (options && options->name) {
> + if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
> + image_size = options->value.n;
> + } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> + backing_file = options->value.s;
> + } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
> + backing_fmt = options->value.s;
I'm not sure here. It doesn't really matter for QED, but I find it
strange that -o backing_fmt=foobar works and gives you a non-raw backing
file. Should we check if the format exists at least?
Kevin
- [Qemu-devel] Re: [PATCH v4 2/5] qed: Add QEMU Enhanced Disk image format,
Kevin Wolf <=