>From 5a16397f0e950f9b17241b49e65eabd716e1774d Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Mon, 7 Dec 2009 13:58:52 +0000 Subject: [PATCH] Disk image shared and exclusive locks. Allow qemu to acquire shared and exclusive locks on disk images. This is done by extending the -drive option with two additional, optional parameters: -drive [...],lock=,backinglock= where is "none", "shared" or "exclusive". lock=none is the default, and it means that we don't try to acquire any sort of lock. lock=shared tries to acquire a shared lock on the disk image. Multiple instances of qemu may all hold this sort of lock. lock=exclusive tries to acquire an exclusive lock on the disk image. An exclusive lock excludes all other shared and exclusive locks. backinglock=none|shared|exclusive is the same, but it applies to the first level (only) of backing disk for formats like qcow2 which support backing disks. If acquisition of a lock fails, opening the image fails. The implementation of locks only works for raw POSIX and Win32 files. However many of the other block types are implemented in terms of these drivers, so they "inherit" locking too. Other drivers are read-only, so don't require locking. Below we note only the cases where locking is *not* implemented: cloop - directly open()s the file, no locking implemented cow - same as cloop curl - protocol probably doesn't support locking nbd - same as curl --- block.c | 10 ++++++++-- block.h | 7 +++++++ block/raw-posix.c | 16 ++++++++++++++++ block/raw-win32.c | 17 +++++++++++++++++ qemu-config.c | 8 ++++++++ qemu-options.hx | 14 ++++++++++++++ vl.c | 29 +++++++++++++++++++++++++++++ 7 files changed, 99 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 853f025..f80d6a8 100644 --- a/block.c +++ b/block.c @@ -448,7 +448,8 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, try_rw = !bs->read_only || bs->is_temporary; if (!(flags & BDRV_O_FILE)) open_flags = (try_rw ? BDRV_O_RDWR : 0) | - (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO)); + (flags & (BDRV_O_CACHE_MASK | BDRV_O_NATIVE_AIO | + BDRV_O_LOCK_MASK | BDRV_O_BACKLOCK_MASK)); else open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) @@ -479,14 +480,19 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags, if (bs->backing_file[0] != '\0') { /* if there is a backing file, use it */ BlockDriver *back_drv = NULL; + int back_drv_open_flags = open_flags; bs->backing_hd = bdrv_new(""); /* pass on read_only property to the backing_hd */ bs->backing_hd->read_only = bs->read_only; + /* lock for the first level backing disk only */ + back_drv_open_flags &= ~(BDRV_O_BACKLOCK_MASK | BDRV_O_LOCK_MASK); + back_drv_open_flags |= + (open_flags & BDRV_O_BACKLOCK_MASK) >> BDRV_O_BACKLOCK_TO_LOCK_SHIFT; path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); if (bs->backing_format[0] != '\0') back_drv = bdrv_find_format(bs->backing_format); - ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags, + ret = bdrv_open2(bs->backing_hd, backing_filename, back_drv_open_flags, back_drv); if (ret < 0) { bdrv_close(bs); diff --git a/block.h b/block.h index 4a8b628..3005eca 100644 --- a/block.h +++ b/block.h @@ -38,8 +38,15 @@ typedef struct QEMUSnapshotInfo { #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ #define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_LOCK_SHARED 0x0100 /* fail unless we can lock shared */ +#define BDRV_O_LOCK_EXCLUSIVE 0x0200 /* fail unless we can lock exclusive */ +#define BDRV_O_BACKLOCK_SHARED 0x0400 /* same for backing disk */ +#define BDRV_O_BACKLOCK_EXCLUSIVE 0x0800 /* "" */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB) +#define BDRV_O_LOCK_MASK (BDRV_O_LOCK_SHARED | BDRV_O_LOCK_EXCLUSIVE) +#define BDRV_O_BACKLOCK_MASK (BDRV_O_BACKLOCK_SHARED|BDRV_O_BACKLOCK_EXCLUSIVE) +#define BDRV_O_BACKLOCK_TO_LOCK_SHIFT 2 #define BDRV_SECTOR_BITS 9 #define BDRV_SECTOR_SIZE (1 << BDRV_SECTOR_BITS) diff --git a/block/raw-posix.c b/block/raw-posix.c index 5a6a22b..8b11b89 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -133,6 +133,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, { BDRVRawState *s = bs->opaque; int fd, ret; + struct flock lk; s->lseek_err_cnt = 0; @@ -163,6 +164,21 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, s->fd = fd; s->aligned_buf = NULL; + if (bdrv_flags & BDRV_O_LOCK_MASK) { + if (bdrv_flags & BDRV_O_LOCK_SHARED) + lk.l_type = F_RDLCK; + else /* bdrv_flags & BDRV_O_LOCK_EXCLUSIVE */ + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; /* means lock the whole file */ + fprintf (stderr, "acquiring %s lock on %s ...\n", + bdrv_flags & BDRV_O_LOCK_SHARED ? "shared" : "exclusive", + filename); + if (fcntl (fd, F_SETLK, &lk) == -1) + goto out_close; + } + if ((bdrv_flags & BDRV_O_NOCACHE)) { s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE); if (s->aligned_buf == NULL) { diff --git a/block/raw-win32.c b/block/raw-win32.c index 72acad5..9d0cfc7 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -78,6 +78,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) BDRVRawState *s = bs->opaque; int access_flags, create_flags; DWORD overlapped; + DWORD lock_flags; + OVERLAPPED ov; s->type = FTYPE_FILE; @@ -106,6 +108,21 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags) return -EACCES; return -1; } + + if (flags & BDRV_O_LOCK_MASK) { + lock_flags = LOCKFILE_FAIL_IMMEDIATELY; + if (flags & BDRV_O_LOCK_EXCLUSIVE) + lock_flags |= LOCKFILE_EXCLUSIVE_LOCK; + + memset(&ov, 0, sizeof(ov)); + ov.Offset = 0; + ov.OffsetHigh = 0; + + if (!LockFileEx(s->hfile, lock_flags, 0, 1, 0, &ov)) + /* For compatibility with the POSIX lock failure ... */ + return -EAGAIN; + } + return 0; } diff --git a/qemu-config.c b/qemu-config.c index 92b5363..62641b4 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -77,6 +77,14 @@ QemuOptsList qemu_drive_opts = { },{ .name = "readonly", .type = QEMU_OPT_BOOL, + },{ + .name = "lock", + .type = QEMU_OPT_STRING, + .help = "lock disk image (exclusive, shared, none)", + },{ + .name = "backinglock", + .type = QEMU_OPT_STRING, + .help = "lock backing disk image (exclusive, shared, none)", }, { /* end if list */ } }, diff --git a/qemu-options.hx b/qemu-options.hx index 1b5781a..a458f3c 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -104,6 +104,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n" " [,cache=writethrough|writeback|none][,format=f][,serial=s]\n" " [,addr=A][,id=name][,aio=threads|native]\n" + " [,lock=exclusive|shared|none][,backinglock=exclusive|shared|none]\n" " use 'file' as a drive image\n") DEF("set", HAS_ARG, QEMU_OPTION_set, "-set group.id.arg=value\n" @@ -146,6 +147,19 @@ an untrusted format header. This option specifies the serial number to assign to the device. @item address@hidden Specify the controller's PCI address (if=virtio only). address@hidden address@hidden +Acquire a lock on the disk image (@var{file}). +Available modes are: exclusive, shared, none. +The default is "none", meaning we don't try to acquire a lock. To +avoid multiple virtual machines trying to write to a disk at the +same time (which can cause disk corruption), use lock=exclusive. address@hidden address@hidden +Acquire a lock on the backing disk. +Available modes are: exclusive, shared, none. +The default is "none", meaning we don't try to acquire a lock. For +disk formats that don't have backing disks, this option is ignored. +In the case where multiple levels of backing disk are used, this +only applies to the first ("parent") backing disk. @end table By default, writethrough caching is used for all block device. This means that diff --git a/vl.c b/vl.c index 09a0ec5..29fff4d 100644 --- a/vl.c +++ b/vl.c @@ -2030,6 +2030,7 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, const char *devaddr; DriveInfo *dinfo; int snapshot = 0; + int lock_flags = 0; *fatal_error = 1; @@ -2220,6 +2221,32 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, } } + if ((buf = qemu_opt_get(opts, "lock")) != NULL) { + if (!strcmp(buf, "none")) + /* nothing */; + else if (!strcmp(buf, "shared")) + lock_flags |= BDRV_O_LOCK_SHARED; + else if (!strcmp(buf, "exclusive")) + lock_flags |= BDRV_O_LOCK_EXCLUSIVE; + else { + fprintf(stderr, "qemu: invalid lock option\n"); + return NULL; + } + } + + if ((buf = qemu_opt_get(opts, "backinglock")) != NULL) { + if (!strcmp(buf, "none")) + /* nothing */; + else if (!strcmp(buf, "shared")) + lock_flags |= BDRV_O_BACKLOCK_SHARED; + else if (!strcmp(buf, "exclusive")) + lock_flags |= BDRV_O_BACKLOCK_EXCLUSIVE; + else { + fprintf(stderr, "qemu: invalid lock option\n"); + return NULL; + } + } + /* compute bus and unit according index */ if (index != -1) { @@ -2364,6 +2391,8 @@ DriveInfo *drive_init(QemuOpts *opts, void *opaque, (void)bdrv_set_read_only(dinfo->bdrv, 1); } + bdrv_flags |= lock_flags; + if (bdrv_open2(dinfo->bdrv, file, bdrv_flags, drv) < 0) { fprintf(stderr, "qemu: could not open disk image %s: %s\n", file, strerror(errno)); -- 1.6.5.2