[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2] blkverify: Add block driver for verifying I/O
From: |
Stefan Hajnoczi |
Subject: |
[Qemu-devel] [PATCH v2] blkverify: Add block driver for verifying I/O |
Date: |
Sat, 4 Sep 2010 09:17:52 +0100 |
The blkverify block driver makes investigating image format data
corruption much easier. A raw image initialized with the same contents
as the test image (e.g. qcow2 file) must be provided. The raw image
mirrors read/write operations and is used to verify that data read from
the test image is correct.
Signed-off-by: Stefan Hajnoczi <address@hidden>
---
v2:
* Implement aio_cancel() by waiting for pending requests
* Fix conflict in Makefile.objs
Makefile.objs | 2 +-
block/blkverify.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
docs/blkverify.txt | 69 +++++++++++
3 files changed, 394 insertions(+), 1 deletions(-)
create mode 100644 block/blkverify.c
create mode 100644 docs/blkverify.txt
diff --git a/Makefile.objs b/Makefile.objs
index 4a1eaa1..b640ec8 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -14,7 +14,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o
vvfat.o
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
block-nested-$(CONFIG_CURL) += curl.o
diff --git a/block/blkverify.c b/block/blkverify.c
new file mode 100644
index 0000000..127c670
--- /dev/null
+++ b/block/blkverify.c
@@ -0,0 +1,324 @@
+/*
+ * Block protocol for block driver correctness testing
+ *
+ * Copyright (C) 2010 IBM, Corp.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include "block_int.h"
+
+typedef struct {
+ BlockDriverState *test_file;
+} BDRVBlkverifyState;
+
+typedef struct BlkverifyAIOCB BlkverifyAIOCB;
+struct BlkverifyAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+
+ /* Request metadata */
+ bool is_write;
+ int64_t sector_num;
+ int nb_sectors;
+
+ int ret; /* first completed request's result */
+ unsigned int done; /* completion counter */
+
+ QEMUIOVector *qiov; /* user I/O vector */
+ QEMUIOVector raw_qiov; /* cloned I/O vector for raw file */
+ void *buf; /* buffer for raw file I/O */
+
+ void (*verify)(BlkverifyAIOCB *acb);
+};
+
+static void blkverify_aio_cancel_cb(void *opaque, int ret)
+{
+ *(bool *)opaque = true;
+}
+
+static void blkverify_aio_cancel(BlockDriverAIOCB *acb)
+{
+ bool done = false;
+
+ /* Allow the request to complete so the raw file and test file stay in
+ * sync. Hijack the callback (since the request is cancelled anyway) to
+ * wait for completion.
+ */
+ acb->common.cb = blkverify_aio_cancel_cb;
+ acb->common.opaque = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static AIOPool blkverify_aio_pool = {
+ .aiocb_size = sizeof(BlkverifyAIOCB),
+ .cancel = blkverify_aio_cancel,
+};
+
+static void blkverify_err(BlkverifyAIOCB *acb, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fprintf(stderr, "blkverify: %s sector_num=%ld nb_sectors=%d ",
+ acb->is_write ? "write" : "read", acb->sector_num,
+ acb->nb_sectors);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ exit(1);
+}
+
+/* Valid blkverify filenames look like
blkverify:path/to/raw_image:path/to/image */
+static int blkverify_open(BlockDriverState *bs, const char *filename, int
flags)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ int ret;
+ char *raw, *c;
+
+ /* Parse the blkverify: prefix */
+ if (strncmp(filename, "blkverify:", strlen("blkverify:"))) {
+ return -EINVAL;
+ }
+ filename += strlen("blkverify:");
+
+ /* Parse the raw image filename */
+ c = strchr(filename, ':');
+ if (c == NULL) {
+ return -EINVAL;
+ }
+
+ raw = strdup(filename);
+ raw[c - filename] = '\0';
+ ret = bdrv_file_open(&bs->file, raw, flags);
+ free(raw);
+ if (ret < 0) {
+ return ret;
+ }
+ filename = c + 1;
+
+ /* Open the test file */
+ s->test_file = bdrv_new("");
+ ret = bdrv_open(s->test_file, filename, flags, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void blkverify_close(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_delete(s->test_file);
+ s->test_file = NULL;
+}
+
+static void blkverify_flush(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ bdrv_flush(s->test_file);
+}
+
+static int64_t blkverify_getlength(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ return bdrv_getlength(s->test_file);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+static ssize_t blkverify_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+/**
+ * Copy contents of I/O vector
+ */
+static void blkverify_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src,
+ void *buf)
+{
+ int i;
+
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, buf, src->iov[i].iov_len);
+ buf += src->iov[i].iov_len;
+ }
+}
+
+static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
+ int64_t sector_num, QEMUIOVector
*qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aio_pool, bs, cb, opaque);
+
+ acb->bh = NULL;
+ acb->is_write = is_write;
+ acb->sector_num = sector_num;
+ acb->nb_sectors = nb_sectors;
+ acb->ret = -EINPROGRESS;
+ acb->done = 0;
+ acb->qiov = qiov;
+ acb->buf = NULL;
+ acb->verify = NULL;
+ return acb;
+}
+
+static void blkverify_aio_bh(void *opaque)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ if (acb->buf) {
+ qemu_iovec_destroy(&acb->raw_qiov);
+ qemu_vfree(acb->buf);
+ }
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_release(acb);
+}
+
+static void blkverify_aio_cb(void *opaque, int ret)
+{
+ BlkverifyAIOCB *acb = opaque;
+
+ switch (++acb->done) {
+ case 1:
+ acb->ret = ret;
+ break;
+
+ case 2:
+ if (acb->ret != ret) {
+ blkverify_err(acb, "return value mismatch %d != %d", acb->ret,
ret);
+ }
+
+ if (acb->verify) {
+ acb->verify(acb);
+ }
+
+ acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ break;
+ }
+}
+
+static void blkverify_verify_readv(BlkverifyAIOCB *acb)
+{
+ ssize_t offset = blkverify_iovec_compare(acb->qiov, &acb->raw_qiov);
+ if (offset != -1) {
+ blkverify_err(acb, "contents mismatch in sector %ld",
+ acb->sector_num + (offset / BDRV_SECTOR_SIZE));
+ }
+}
+
+static BlockDriverAIOCB *blkverify_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ acb->verify = blkverify_verify_readv;
+ acb->buf = qemu_blockalign(bs->file, qiov->size);
+ qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
+ blkverify_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
+
+ if (!bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+ BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
+ nb_sectors, cb, opaque);
+
+ if (!bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ if (!bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ blkverify_aio_cb, acb)) {
+ blkverify_aio_cb(acb, -EIO);
+ }
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *blkverify_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ /* Only flush test file, the raw file is not important */
+ return bdrv_aio_flush(s->test_file, cb, opaque);
+}
+
+static BlockDriver bdrv_blkverify = {
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_getlength = blkverify_getlength,
+
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_flush = blkverify_flush,
+
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
+};
+
+static void bdrv_blkverify_init(void)
+{
+ bdrv_register(&bdrv_blkverify);
+}
+
+block_init(bdrv_blkverify_init);
diff --git a/docs/blkverify.txt b/docs/blkverify.txt
new file mode 100644
index 0000000..d556dc4
--- /dev/null
+++ b/docs/blkverify.txt
@@ -0,0 +1,69 @@
+= Block driver correctness testing with blkverify =
+
+== Introduction ==
+
+This document describes how to use the blkverify protocol to test that a block
+driver is operating correctly.
+
+It is difficult to test and debug block drivers against real guests. Often
+processes inside the guest will crash because corrupt sectors were read as part
+of the executable. Other times obscure errors are raised by a program inside
+the guest. These issues are extremely hard to trace back to bugs in the block
+driver.
+
+Blkverify solves this problem by catching data corruption inside QEMU the first
+time bad data is read and reporting the disk sector that is corrupted.
+
+== How it works ==
+
+The blkverify protocol has two child block devices, the "test" device and the
+"raw" device. Read/write operations are mirrored to both devices so their
+state should always be in sync.
+
+The "raw" device is a raw image, a flat file, that has identical starting
+contents to the "test" image. The idea is that the "raw" device will handle
+read/write operations correctly and not corrupt data. It can be used as a
+reference for comparison against the "test" device.
+
+After a mirrored read operation completes, blkverify will compare the data and
+raise an error if it is not identical. This makes it possible to catch the
+first instance where corrupt data is read.
+
+== Example ==
+
+Imagine raw.img has 0xcd repeated throughout its first sector:
+
+ $ ./qemu-io -c 'read -v 0 512' raw.img
+ 00000000: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
................
+ 00000010: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
................
+ [...]
+ 000001e0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
................
+ 000001f0: cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (97.656 MiB/sec and 200000.0000
ops/sec)
+
+And test.img is corrupt, its first sector is zeroed when it shouldn't be:
+
+ $ ./qemu-io -c 'read -v 0 512' test.img
+ 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
................
+ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
................
+ [...]
+ 000001e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
................
+ 000001f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
................
+ read 512/512 bytes at offset 0
+ 512.000000 bytes, 1 ops; 0.0000 sec (81.380 MiB/sec and 166666.6667
ops/sec)
+
+This error is caught by blkverify:
+
+ $ ./qemu-io -c 'read 0 512' blkverify:a.img:b.img
+ blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0
+
+A more realistic scenario is verifying the installation of a guest OS:
+
+ $ ./qemu-img create raw.img 16G
+ $ ./qemu-img create -f qcow2 test.qcow2 16G
+ $ x86_64-softmmu/qemu-system-x86_64 -cdrom debian.iso \
+ -drive
file=blkverify:raw.img:test.qcow2
+
+If the installation is aborted when blkverify detects corruption, use qemu-io
+to explore the contents of the disk image at the sector in question.
--
1.7.1
- [Qemu-devel] [PATCH v2] blkverify: Add block driver for verifying I/O,
Stefan Hajnoczi <=