qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroe


From: Peter Lieven
Subject: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
Date: Wed, 7 May 2014 02:01:40 +0200

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

this should significantly speed up file system initialization and
should speed zero write test used to test backend storage performance.

the difference can simply be tested by e.g.

   dd if=/dev/zero of=/dev/vdX bs=1M

or

   mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

Signed-off-by: Peter Lieven <address@hidden>
---
v2->v3: - moved parameter parsing to blockdev_init
        - added per device detect_zeroes status to 
          hmp (info block -v) and qmp (query-block) [Eric]
        - added support to enable detect-zeroes also
          for hot added devices [Eric]
        - added missing entry to qemu_common_drive_opts
        - fixed description of qemu_iovec_is_zero [Fam]

v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen) (Eric)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   11 ++++++++++
 block/qapi.c              |   11 ++++++++++
 blockdev.c                |   28 +++++++++++++++++++++++++
 hmp.c                     |    6 ++++++
 include/block/block_int.h |   12 +++++++++++
 include/qemu-common.h     |    1 +
 qapi-schema.json          |   50 +++++++++++++++++++++++++++++++--------------
 qemu-options.hx           |    6 ++++++
 qmp-commands.hx           |    3 +++
 util/iov.c                |   21 +++++++++++++++++++
 10 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index b749d31..f27b35d 100644
--- a/block.c
+++ b/block.c
@@ -3244,6 +3244,17 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/block/qapi.c b/block/qapi.c
index af11445..fbf66c2 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->backing_file_depth = bdrv_get_backing_file_depth(bs);
 
+    switch (bs->detect_zeroes) {
+    case BDRV_DETECT_ZEROES_ON:
+        info->detect_zeroes = g_strdup("on");
+        break;
+    case BDRV_DETECT_ZEROES_UNMAP:
+        info->detect_zeroes = g_strdup("unmap");
+        break;
+    default:
+        info->detect_zeroes = g_strdup("off");
+    }
+
     if (bs->io_limits_enabled) {
         ThrottleConfig cfg;
         throttle_get_config(&bs->throttle_state, &cfg);
diff --git a/blockdev.c b/blockdev.c
index 7810e9f..96c11fd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, bool 
is_read, Error **errp)
     }
 }
 
+static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
+{
+    if (!buf || !strcmp(buf, "off")) {
+        return BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(buf, "on")) {
+        return BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(buf, "unmap")) {
+        return BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   buf);
+    }
+    return BDRV_DETECT_ZEROES_OFF;
+}
+
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict 
*bs_opts,
     QemuOpts *opts;
     const char *id;
     bool has_driver_specific_opts;
+    BdrvDetectZeroes detect_zeroes;
     BlockDriver *drv = NULL;
 
     /* Check common options by copying from bs_opts to opts, all other options
@@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict 
*bs_opts,
         }
     }
 
+    detect_zeroes =
+        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
+    if (error) {
+        error_propagate(errp, error);
+        goto early_err;
+    }
+
     /* init */
     dinfo = g_malloc0(sizeof(*dinfo));
     dinfo->id = g_strdup(qemu_opts_id(opts));
@@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict 
*bs_opts,
     }
     dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
     dinfo->bdrv->read_only = ro;
+    dinfo->bdrv->detect_zeroes = detect_zeroes;
     dinfo->refcount = 1;
     if (serial != NULL) {
         dinfo->serial = g_strdup(serial);
@@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
             .help = "copy read data from backing file into image file",
+        },{
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes",
         },
         { /* end of list */ }
     },
diff --git a/hmp.c b/hmp.c
index ca869ba..b1942ed 100644
--- a/hmp.c
+++ b/hmp.c
@@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
                            info->value->inserted->backing_file_depth);
         }
 
+        if (verbose) {
+            monitor_printf(mon,
+                           "    Detect zeroes:    %s\n",
+                           info->value->inserted->detect_zeroes);
+        }
+
         if (info->value->inserted->bps
             || info->value->inserted->bps_rd
             || info->value->inserted->bps_wr
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ffcb69..7b9ca05 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -364,6 +375,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index a998e8d..8e3d6eb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b00427..5e3b4a89 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -937,6 +937,8 @@
 # @encryption_key_missing: true if the backing device is encrypted but an
 #                          valid encryption key is missing
 #
+# @detect_zeroes: detect and optimize zero writes (Since 2.1)
+#
 # @bps: total throughput limit in bytes per second is specified
 #
 # @bps_rd: read throughput limit in bytes per second is specified
@@ -972,6 +974,7 @@
   'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
             '*backing_file': 'str', 'backing_file_depth': 'int',
             'encrypted': 'bool', 'encryption_key_missing': 'bool',
+            'detect_zeroes': 'str',
             'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
             'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             'image': 'ImageInfo',
@@ -4250,6 +4253,20 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
+# @BlockdevDetectZeroesOptions
+#
+# Selects the operation mode for zero write detection.
+#
+# @off:      Disabled
+# @on:       Enabled
+# @unmap:    Enabled and even try to unmap blocks if possible
+#
+# Since: 2.1
+##
+{ 'enum': 'BlockdevDetectZeroesOptions',
+  'data': [ 'off', 'on', 'unmap' ] }
+
+##
 # @BlockdevAioOptions
 #
 # Selects the AIO backend to handle I/O requests
@@ -4301,20 +4318,22 @@
 # Options that are available for all block devices, independent of the block
 # driver.
 #
-# @driver:      block driver name
-# @id:          #optional id by which the new block device can be referred to.
-#               This is a required option on the top level of blockdev-add, and
-#               currently not allowed on any other level.
-# @node-name:   #optional the name of a block driver state node (Since 2.0)
-# @discard:     #optional discard-related options (default: ignore)
-# @cache:       #optional cache-related options
-# @aio:         #optional AIO backend (default: threads)
-# @rerror:      #optional how to handle read errors on the device
-#               (default: report)
-# @werror:      #optional how to handle write errors on the device
-#               (default: enospc)
-# @read-only:   #optional whether the block device should be read-only
-#               (default: false)
+# @driver:        block driver name
+# @id:            #optional id by which the new block device can be referred 
to.
+#                 This is a required option on the top level of blockdev-add, 
and
+#                 currently not allowed on any other level.
+# @node-name:     #optional the name of a block driver state node (Since 2.0)
+# @discard:       #optional discard-related options (default: ignore)
+# @cache:         #optional cache-related options
+# @aio:           #optional AIO backend (default: threads)
+# @rerror:        #optional how to handle read errors on the device
+#                 (default: report)
+# @werror:        #optional how to handle write errors on the device
+#                 (default: enospc)
+# @read-only:     #optional whether the block device should be read-only
+#                 (default: false)
+# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
+#                 (default: off)
 #
 # Since: 1.7
 ##
@@ -4327,7 +4346,8 @@
             '*aio': 'BlockdevAioOptions',
             '*rerror': 'BlockdevOnError',
             '*werror': 'BlockdevOnError',
-            '*read-only': 'bool' } }
+            '*read-only': 'bool',
+            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
 
 ##
 # @BlockdevOptionsFile
diff --git a/qemu-options.hx b/qemu-options.hx
index 781af14..5ee94ea 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
     "       
[,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write 
attempts will fail.
 @item address@hidden
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
address@hidden address@hidden
address@hidden is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ed3ab92..a535955 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2032,6 +2032,8 @@ Each json-object contain the following:
          - "iops_rd_max":  read I/O operations max (json-int)
          - "iops_wr_max":  write I/O operations max (json-int)
          - "iops_size": I/O size when limiting by iops (json-int)
+         - "detect_zeroes": detect and optimize zero writes (json-string)
+             - Possible values: "off", "on", "unmap"
          - "image": the detail of the image, it is a json-object containing
             the following:
              - "filename": image file name (json-string)
@@ -2108,6 +2110,7 @@ Example:
                "iops_rd_max": 0,
                "iops_wr_max": 0,
                "iops_size": 0,
+               "detect_zeroes": "on",
                "image":{
                   "filename":"disks/test.qcow2",
                   "format":"qcow2",
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..f8c49a1 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]