qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC][PATCH v2] block: add write threshold reporting for bl


From: Francesco Romani
Subject: [Qemu-devel] [RFC][PATCH v2] block: add write threshold reporting for block devices
Date: Fri, 7 Nov 2014 14:12:13 +0100

Managing applications, like oVirt (http://www.ovirt.org), make extensive
use of thin-provisioned disk images.
To let the guest run smoothly and be not unnecessarily paused, oVirt sets
a disk usage threshold (so called 'high water mark') based on the occupation
of the device,  and automatically extends the image once the threshold
is reached or exceeded.

In order to detect the crossing of the threshold, oVirt has no choice but
aggressively polling the QEMU monitor using the query-blockstats command.
This lead to unnecessary system load, and is made even worse under scale:
deployments with hundreds of VMs are no longer rare.

To fix this, this patch adds:
* A new monitor command to set a mark for a given block device.
* A new event to report if a block device usage exceeds the threshold.

This will allow the managing application to drop the polling
altogether and just wait for a watermark crossing event.

Signed-off-by: Francesco Romani <address@hidden>
---
 block/Makefile.objs             |   1 +
 block/qapi.c                    |   3 +
 block/usage-threshold.c         | 124 ++++++++++++++++++++++++++++++++++++++++
 include/block/block_int.h       |   4 ++
 include/block/usage-threshold.h |  39 +++++++++++++
 qapi/block-core.json            |  46 ++++++++++++++-
 qmp-commands.hx                 |  26 +++++++++
 7 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 block/usage-threshold.c
 create mode 100644 include/block/usage-threshold.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 04b0e43..43e381d 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -20,6 +20,7 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o
+block-obj-y += usage-threshold.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/qapi.c b/block/qapi.c
index 1301144..3bb0bc7 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -24,6 +24,7 @@
 
 #include "block/qapi.h"
 #include "block/block_int.h"
+#include "block/usage-threshold.h"
 #include "qmp-commands.h"
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
@@ -315,6 +316,8 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo 
**p_info,
         }
     }
 
+    info->write_threshold = bdrv_get_usage_threshold(bs);
+
     *p_info = info;
     return;
 
diff --git a/block/usage-threshold.c b/block/usage-threshold.c
new file mode 100644
index 0000000..31a587d
--- /dev/null
+++ b/block/usage-threshold.c
@@ -0,0 +1,124 @@
+/*
+ * QEMU System Emulator block usage threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *  Francesco Romani <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "block/block_int.h"
+#include "block/coroutine.h"
+#include "block/usage-threshold.h"
+#include "qemu/notify.h"
+#include "qapi-event.h"
+#include "qmp-commands.h"
+
+
+int64_t bdrv_get_usage_threshold(const BlockDriverState *bs)
+{
+    if (bs == NULL) {
+        return 0;
+    }
+    if (bs->file) {
+        return bs->file->wr_offset_threshold;
+    }
+    return bs->wr_offset_threshold;
+}
+
+static void usage_threshold_disable(BlockDriverState *bs)
+{
+    notifier_with_return_remove(&bs->wr_usage_threshold_notifier);
+    bs->wr_offset_threshold = 0;
+}
+
+static int usage_threshold_is_set(const BlockDriverState *bs)
+{
+    return !!(bs->wr_offset_threshold > 0);
+}
+
+static int64_t usage_threshold_exceeded(const BlockDriverState *bs,
+                                        const BdrvTrackedRequest *req)
+{
+    if (usage_threshold_is_set(bs)) {
+        int64_t amount = req->offset + req->bytes - bs->wr_offset_threshold;
+        if (amount > 0) {
+            return amount;
+        }
+    }
+    return 0;
+}
+
+static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
+                                            void *opaque)
+{
+    BdrvTrackedRequest *req = opaque;
+    BlockDriverState *bs = req->bs;
+    int64_t amount = 0;
+
+    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+    amount = usage_threshold_exceeded(bs, req);
+    if (amount > 0) {
+        qapi_event_send_block_usage_threshold(
+            bdrv_get_device_name(bs), /* FIXME: this does not work */
+            amount,
+            bs->wr_offset_threshold,
+            &error_abort);
+
+        /* autodisable to avoid to flood the monitor */
+        usage_threshold_disable(bs);
+    }
+
+    return 0; /* should always let other notifiers run */
+}
+
+static void usage_threshold_register_notifier(BlockDriverState *bs)
+{
+    bs->wr_usage_threshold_notifier.notify = before_write_notify;
+    notifier_with_return_list_add(&bs->before_write_notifiers,
+                                  &bs->wr_usage_threshold_notifier);
+}
+
+void bdrv_set_usage_threshold(BlockDriverState *bs, int64_t threshold_bytes)
+{
+    BlockDriverState *target_bs = bs;
+    if (bs->file) {
+        target_bs = bs->file;
+    }
+
+    if (usage_threshold_is_set(target_bs)) {
+        if (threshold_bytes > 0) { /* update */
+            target_bs->wr_offset_threshold = threshold_bytes;
+        } else {
+            usage_threshold_disable(target_bs);
+        }
+    } else if (threshold_bytes > 0) { /* update only if meaningful */
+        usage_threshold_register_notifier(target_bs);
+        target_bs->wr_offset_threshold = threshold_bytes;
+    }
+}
+
+void qmp_block_set_threshold(const char *device, int64_t threshold_bytes,
+                             Error **errp)
+{
+    BlockDriverState *bs;
+    AioContext *aio_context;
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    bdrv_set_usage_threshold(bs, threshold_bytes);
+
+    aio_context_release(aio_context);
+}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a1c17b9..f6b26f3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -409,6 +409,10 @@ struct BlockDriverState {
 
     /* The error object in use for blocking operations on backing_hd */
     Error *backing_blocker;
+
+    /* threshold limit for writes, in bytes. "High water mark". */
+    int64_t wr_offset_threshold;
+    NotifierWithReturn wr_usage_threshold_notifier;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/block/usage-threshold.h b/include/block/usage-threshold.h
new file mode 100644
index 0000000..96b8274
--- /dev/null
+++ b/include/block/usage-threshold.h
@@ -0,0 +1,39 @@
+/*
+ * QEMU System Emulator block usage threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ *  Francesco Romani <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef BLOCK_USAGE_THRESHOLD_H
+#define BLOCK_USAGE_THRESHOLD_H
+
+#include <stdint.h>
+
+#include "qemu/typedefs.h"
+
+/*
+ * bdrv_set_usage_threshold:
+ *
+ * Set the usage threshold for block devices, in bytes.
+ * Notify when a write exceeds the threshold, meaning the device
+ * is becoming full, so it can be transparently resized.
+ * To be used with thin-provisioned block devices.
+ *
+ * Use threshold_bytes == 0 to disable.
+ */
+void bdrv_set_usage_threshold(BlockDriverState *bs, int64_t threshold_bytes);
+
+/*
+ * bdrv_get_usage_threshold
+ *
+ * Get the configured usage threshold, in bytes.
+ * Zero means no threshold configured.
+ */
+int64_t bdrv_get_usage_threshold(const BlockDriverState *bs);
+
+#endif
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 77a0cfb..3441e41 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -345,13 +345,17 @@
 # @inserted: #optional @BlockDeviceInfo describing the device if media is
 #            present
 #
+# @write-threshold: configured write threshold for the device.
+#                   0 if disabled. (Since 2.2)
+#
 # Since:  0.14.0
 ##
 { 'type': 'BlockInfo',
   'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
            'locked': 'bool', '*inserted': 'BlockDeviceInfo',
            '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
-           '*dirty-bitmaps': ['BlockDirtyInfo'] } }
+           '*dirty-bitmaps': ['BlockDirtyInfo'],
+           'write-threshold': 'int' } }
 
 ##
 # @query-block:
@@ -1827,3 +1831,43 @@
 ##
 { 'enum': 'PreallocMode',
   'data': [ 'off', 'metadata', 'falloc', 'full' ] }
+
+##
+# @BLOCK_USAGE_THRESHOLD
+#
+# Emitted when writes on block device reaches or exceeds the
+# configured threshold. For thin-provisioned devices, this
+# means the device should be extended to avoid pausing for
+# disk exaustion.
+#
+# @device: device name
+#
+# @amount-exceeded: amount of data which exceeded the threshold, in bytes.
+#
+# @offset-threshold: last configured threshold, in bytes.
+#
+# Since: 2.3
+##
+{ 'event': 'BLOCK_USAGE_THRESHOLD',
+  'data': { 'device': 'str', 'amount-exceeded': 'int', 'threshold': 'int' } }
+
+##
+# @block-set-threshold
+#
+# Change usage threshold for a block drive. An event will be delivered
+# if a write to this block drive crosses the configured threshold.
+# This is useful to transparently resize thin-provisioned drives without
+# the guest OS noticing.
+#
+# @device: The name of the device
+#
+# @write-threshold: configured threshold for the block device, bytes.
+#                   Use 0 to disable the threshold.
+#
+# Returns: Nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 2.3
+##
+{ 'command': 'block-set-threshold',
+  'data': { 'device': 'str', 'threshold': 'int' } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 1abd619..93b0cb9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3852,3 +3852,29 @@ Move mouse pointer to absolute coordinates (20000, 400).
 <- { "return": {} }
 
 EQMP
+
+    {
+        .name       = "block-set-threshold",
+        .args_type  = "device:B,threshold:l",
+        .mhandler.cmd_new = qmp_marshal_input_block_set_threshold,
+    },
+
+SQMP
+block-set-threshold
+------------
+
+Change the write threshold for a block drive.
+
+Arguments:
+
+- "device": device name (json-string)
+- "threshold": the write threshold in bytes (json-int)
+
+Example:
+
+-> { "execute": "block-set-threshold",
+  "arguments": { "device": "drive-virtio-disk0",
+                 "threshold": 17179869184 } }
+<- { "return": {} }
+
+EQMP
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]