[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v8 05/36] raw-posix: Add image locking support
From: |
Fam Zheng |
Subject: |
[Qemu-devel] [PATCH v8 05/36] raw-posix: Add image locking support |
Date: |
Fri, 30 Sep 2016 20:09:35 +0800 |
virtlockd in libvirt locks the first byte, we lock byte 1 to avoid
the intervene.
Both file and host device protocols are covered.
The complication is with reopen. We have three different locking states,
namely "unlocked", "shared locked" and "exclusively locked".
When we reopen, the new fd may need a new locking mode. Moving away to or from
exclusive is a bit tricky because we cannot do it atomically. This patch solves
it by dup() s->fd to s->lock_fd and avoid close(), so that there isn't a racy
window where we drop the lock on one fd before acquiring the exclusive lock on
the other.
To make the logic easier to manage, and allow better reuse, the code is
internally organized by state transition table (old_lock -> new_lock).
Signed-off-by: Fam Zheng <address@hidden>
---
block/raw-posix.c | 318 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 317 insertions(+), 1 deletion(-)
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6ed7547..22de242 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -133,6 +133,7 @@ do { \
typedef struct BDRVRawState {
int fd;
+ int lock_fd;
int type;
int open_flags;
size_t buf_align;
@@ -149,6 +150,7 @@ typedef struct BDRVRawState {
typedef struct BDRVRawReopenState {
int fd;
+ int lock_fd;
int open_flags;
} BDRVRawReopenState;
@@ -367,6 +369,43 @@ static void raw_parse_flags(int bdrv_flags, int
*open_flags)
}
}
+static int raw_lock_fd(int fd, ImageLockMode mode)
+{
+ assert(fd >= 0);
+ /* Locking byte 1 avoids interfereing with virtlockd. */
+ switch (mode) {
+ case IMAGE_LOCK_MODE_EXCLUSIVE:
+ return qemu_lock_fd(fd, 1, 1, true);
+ case IMAGE_LOCK_MODE_SHARED:
+ return qemu_lock_fd(fd, 1, 1, false);
+ case IMAGE_LOCK_MODE_NOLOCK:
+ return qemu_unlock_fd(fd, 1, 1);
+ default:
+ abort();
+ }
+}
+
+static int raw_lockf(BlockDriverState *bs, ImageLockMode mode)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->lock_fd < 0) {
+ if (mode == IMAGE_LOCK_MODE_NOLOCK) {
+ return 0;
+ }
+ s->lock_fd = qemu_dup(s->fd);
+ if (s->lock_fd < 0) {
+ return s->lock_fd;
+ }
+ }
+ if (mode == IMAGE_LOCK_MODE_AUTO) {
+ mode = bdrv_get_flags(bs) & BDRV_O_RDWR ?
+ IMAGE_LOCK_MODE_EXCLUSIVE :
+ IMAGE_LOCK_MODE_SHARED;
+ }
+ return raw_lock_fd(s->lock_fd, mode);
+}
+
#ifdef CONFIG_LINUX_AIO
static bool raw_use_aio(int bdrv_flags)
{
@@ -433,6 +472,7 @@ static int raw_open_common(BlockDriverState *bs, QDict
*options,
raw_parse_flags(bdrv_flags, &s->open_flags);
s->fd = -1;
+ s->lock_fd = -1;
fd = qemu_open(filename, s->open_flags, 0644);
if (fd < 0) {
ret = -errno;
@@ -529,6 +569,268 @@ static int raw_open(BlockDriverState *bs, QDict *options,
int flags,
return raw_open_common(bs, options, flags, 0, errp);
}
+typedef enum {
+ RAW_REOPEN_PREPARE,
+ RAW_REOPEN_COMMIT,
+ RAW_REOPEN_ABORT
+} RawReopenOperation;
+
+typedef int (*RawReopenFunc)(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp);
+
+static int
+raw_reopen_identical(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp)
+{
+ assert(old_lock == new_lock);
+ return 0;
+}
+
+static int
+raw_reopen_from_unlock(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+ int ret = 0;
+
+ assert(old_lock != new_lock);
+ assert(old_lock == IMAGE_LOCK_MODE_NOLOCK);
+ switch (op) {
+ case RAW_REOPEN_PREPARE:
+ ret = raw_lock_fd(raw_s->lock_fd, new_lock);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to lock new fd %d",
raw_s->lock_fd);
+ }
+ break;
+ case RAW_REOPEN_COMMIT:
+ case RAW_REOPEN_ABORT:
+ break;
+ }
+
+ return ret;
+}
+
+static int
+raw_reopen_to_unlock(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp)
+{
+ BDRVRawState *s = state->bs->opaque;
+ int ret = 0;
+
+ assert(old_lock != new_lock);
+ assert(new_lock == IMAGE_LOCK_MODE_NOLOCK);
+ switch (op) {
+ case RAW_REOPEN_PREPARE:
+ break;
+ case RAW_REOPEN_COMMIT:
+ if (s->lock_fd >= 0) {
+ qemu_close(s->lock_fd);
+ s->lock_fd = -1;
+ }
+ break;
+ case RAW_REOPEN_ABORT:
+ break;
+ }
+
+ return ret;
+}
+
+static int
+raw_reopen_upgrade(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
+ int ret = 0, ret2;
+
+ assert(old_lock == IMAGE_LOCK_MODE_SHARED);
+ assert(new_lock == IMAGE_LOCK_MODE_EXCLUSIVE);
+ switch (op) {
+ case RAW_REOPEN_PREPARE:
+ ret = raw_lock_fd(raw_s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to lock new fd (shared)");
+ break;
+ }
+ ret = raw_lock_fd(s->lock_fd, IMAGE_LOCK_MODE_NOLOCK);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to unlock old fd");
+ goto restore;
+ }
+ ret = raw_lock_fd(raw_s->lock_fd, IMAGE_LOCK_MODE_EXCLUSIVE);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to lock new fd (exclusive)");
+ goto restore;
+ }
+ break;
+ case RAW_REOPEN_COMMIT:
+ break;
+ case RAW_REOPEN_ABORT:
+ raw_lock_fd(raw_s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ ret = raw_lock_fd(s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ if (ret) {
+ error_report("Failed to restore lock on old fd");
+ }
+ break;
+ }
+
+ return ret;
+restore:
+ ret2 = raw_lock_fd(s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ if (ret2) {
+ error_report("Failed to restore old lock");
+ }
+ return ret;
+}
+
+static int
+raw_reopen_downgrade(BDRVReopenState *state,
+ RawReopenOperation op,
+ ImageLockMode old_lock,
+ ImageLockMode new_lock,
+ Error **errp)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
+ int ret = 0;
+
+ assert(old_lock == IMAGE_LOCK_MODE_EXCLUSIVE);
+ assert(new_lock == IMAGE_LOCK_MODE_SHARED);
+ switch (op) {
+ case RAW_REOPEN_PREPARE:
+ break;
+ case RAW_REOPEN_COMMIT:
+ ret = raw_lock_fd(s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ if (ret) {
+ error_report("Failed to downgrade old lock");
+ break;
+ }
+ ret = raw_lock_fd(raw_s->lock_fd, IMAGE_LOCK_MODE_SHARED);
+ if (ret) {
+ error_report("Failed to lock new fd (shared)");
+ break;
+ }
+ break;
+ case RAW_REOPEN_ABORT:
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * Transactionally moving between three possible locking states is tricky and
+ * must be done carefully. That is mostly because downgrading an exclusive lock
+ * to shared or unlocked is not guaranteed to be revertable. As a result, in
+ * such cases we have to defer the downgraing to "commit", given that no revert
+ * will happen after that point, and that downgrading a lock should never fail.
+ *
+ * On the other hand, upgrading a lock (e.g. from unlocked or shared to
+ * exclusive lock) must happen in "prepare" because it may fail.
+ *
+ * Manage the operation matrix with this state transition table to make
+ * fulfulling above conditions easier.
+ */
+static const struct RawReopenFuncRecord {
+ ImageLockMode old_lock;
+ ImageLockMode new_lock;
+ RawReopenFunc func;
+ bool need_lock_fd;
+} reopen_functions[] = {
+ {IMAGE_LOCK_MODE_NOLOCK, IMAGE_LOCK_MODE_NOLOCK, raw_reopen_identical,
false},
+ {IMAGE_LOCK_MODE_NOLOCK, IMAGE_LOCK_MODE_SHARED, raw_reopen_from_unlock,
true},
+ {IMAGE_LOCK_MODE_NOLOCK, IMAGE_LOCK_MODE_EXCLUSIVE,
raw_reopen_from_unlock, true},
+ {IMAGE_LOCK_MODE_SHARED, IMAGE_LOCK_MODE_NOLOCK, raw_reopen_to_unlock,
false},
+ {IMAGE_LOCK_MODE_SHARED, IMAGE_LOCK_MODE_SHARED, raw_reopen_identical,
false},
+ {IMAGE_LOCK_MODE_SHARED, IMAGE_LOCK_MODE_EXCLUSIVE, raw_reopen_upgrade,
true},
+ {IMAGE_LOCK_MODE_EXCLUSIVE, IMAGE_LOCK_MODE_NOLOCK, raw_reopen_to_unlock,
false},
+ {IMAGE_LOCK_MODE_EXCLUSIVE, IMAGE_LOCK_MODE_SHARED, raw_reopen_downgrade,
true},
+ {IMAGE_LOCK_MODE_EXCLUSIVE, IMAGE_LOCK_MODE_EXCLUSIVE,
raw_reopen_identical, false},
+};
+
+static int raw_reopen_handle_lock(BDRVReopenState *state,
+ RawReopenOperation op,
+ Error **errp)
+{
+ BDRVRawReopenState *raw_s = state->opaque;
+ BDRVRawState *s = state->bs->opaque;
+ ImageLockMode old_lock, new_lock;
+ const struct RawReopenFuncRecord *rec;
+ int ret;
+
+ old_lock = bdrv_get_lock_mode(state->bs);
+ new_lock = bdrv_lock_mode_from_flags(state->flags);
+
+ if (old_lock == IMAGE_LOCK_MODE__MAX) {
+ /* bs was not locked, leave it unlocked. */
+ old_lock = new_lock = IMAGE_LOCK_MODE_NOLOCK;
+ }
+
+ if (old_lock == IMAGE_LOCK_MODE_AUTO) {
+ old_lock = bdrv_get_flags(state->bs) & BDRV_O_RDWR ?
+ IMAGE_LOCK_MODE_EXCLUSIVE : IMAGE_LOCK_MODE_SHARED;
+ }
+
+ if (new_lock == IMAGE_LOCK_MODE_AUTO) {
+ new_lock = state->flags & BDRV_O_RDWR ?
+ IMAGE_LOCK_MODE_EXCLUSIVE : IMAGE_LOCK_MODE_SHARED;
+ }
+
+ for (rec = &reopen_functions[0];
+ rec < &reopen_functions[ARRAY_SIZE(reopen_functions)];
+ rec++) {
+ if (rec->old_lock == old_lock && rec->new_lock == new_lock) {
+ break;
+ }
+ }
+ assert(rec != &reopen_functions[ARRAY_SIZE(reopen_functions)]);
+
+ switch (op) {
+ case RAW_REOPEN_PREPARE:
+ if (rec->need_lock_fd) {
+ ret = qemu_dup(raw_s->fd);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to dup new fd");
+ return ret;
+ }
+ raw_s->lock_fd = ret;
+ }
+ return rec->func(state, op, old_lock, new_lock, errp);
+ case RAW_REOPEN_COMMIT:
+ rec->func(state, op, old_lock, new_lock, errp);
+ if (rec->need_lock_fd) {
+ if (s->lock_fd >= 0) {
+ qemu_close(s->lock_fd);
+ }
+ s->lock_fd = raw_s->lock_fd;
+ }
+ break;
+ case RAW_REOPEN_ABORT:
+ rec->func(state, op, old_lock, new_lock, errp);
+ if (rec->need_lock_fd && raw_s->lock_fd >= 0) {
+ qemu_close(raw_s->lock_fd);
+ raw_s->lock_fd = -1;
+ }
+ break;
+ }
+ return 0;
+}
+
static int raw_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
@@ -607,6 +909,10 @@ static int raw_reopen_prepare(BDRVReopenState *state,
}
}
+ if (!ret) {
+ ret = raw_reopen_handle_lock(state, RAW_REOPEN_PREPARE, errp);
+ }
+
return ret;
}
@@ -617,6 +923,8 @@ static void raw_reopen_commit(BDRVReopenState *state)
s->open_flags = raw_s->open_flags;
+ raw_reopen_handle_lock(state, RAW_REOPEN_COMMIT, NULL);
+
qemu_close(s->fd);
s->fd = raw_s->fd;
@@ -634,6 +942,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
return;
}
+ raw_reopen_handle_lock(state, RAW_REOPEN_ABORT, NULL);
+
if (raw_s->fd >= 0) {
qemu_close(raw_s->fd);
raw_s->fd = -1;
@@ -1321,6 +1631,10 @@ static void raw_close(BlockDriverState *bs)
qemu_close(s->fd);
s->fd = -1;
}
+ if (s->lock_fd >= 0) {
+ qemu_close(s->lock_fd);
+ s->lock_fd = -1;
+ }
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -1874,7 +2188,7 @@ BlockDriver bdrv_file = {
.bdrv_get_info = raw_get_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
-
+ .bdrv_lockf = raw_lockf,
.create_opts = &raw_create_opts,
};
@@ -2324,6 +2638,8 @@ static BlockDriver bdrv_host_device = {
#ifdef __linux__
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
+
+ .bdrv_lockf = raw_lockf,
};
#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
--
2.7.4
- [Qemu-devel] [PATCH v8 00/36] block: Image locking series, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 01/36] block: Add flag bits for image locking, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 02/36] qapi: Add ImageLockMode, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 03/36] block: Introduce image file locking, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 04/36] osdep: Add qemu_lock_fd and qemu_unlock_fd, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 06/36] qemu-io: Add "-L" option for BDRV_O_NO_LOCK, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 05/36] raw-posix: Add image locking support,
Fam Zheng <=
- [Qemu-devel] [PATCH v8 08/36] qemu-img: Update documentation of "-L" option, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 07/36] qemu-img: Add "-L" option to sub commands, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 10/36] block: Don't lock drive-backup target image in none mode, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 09/36] qemu-nbd: Add "--no-lock/-L" option, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 11/36] block: Add blk_lock_image, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 12/36] virtio-blk: Apply lock-mode when realize, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 13/36] scsi-disk: Apply lock-mode when realize, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 14/36] scsi-generic: Apply lock-mode when realize, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 15/36] qdev: Add "lock-mode" to block device options, Fam Zheng, 2016/09/30
- [Qemu-devel] [PATCH v8 16/36] ide: Apply lock-mode when initialize, Fam Zheng, 2016/09/30