qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH 05/13] add rcu library


From: Paolo Bonzini
Subject: [Qemu-devel] [RFC PATCH 05/13] add rcu library
Date: Mon, 15 Aug 2011 14:08:32 -0700

This includes a (mangled) copy of the urcu-qsbr code from liburcu.
The main changes are: 1) removing dependencies on many other header files
in liburcu; 2) removing for simplicity the tentative busy waiting in
synchronize_rcu, which has limited performance effects; 3) replacing
futexes in synchronize_rcu with QemuEvents for Win32 portability.
The API is the same as liburcu, so it should be possible in the future
to require liburcu on POSIX systems for example and use our copy only
on Windows.

Among the various versions available I chose urcu-qsbr, which has the
fastest rcu_read_{lock,unlock} but requires the program to manually
annotate quiescent points or intervals.  QEMU threads usually have easily
identified quiescent periods, so this should not be a problem.

Signed-off-by: Paolo Bonzini <address@hidden>
---
 Makefile.objs |    2 +-
 qemu-queue.h  |   11 +++
 rcu-pointer.h |  119 +++++++++++++++++++++++++++++++
 rcu.c         |  221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 rcu.h         |  135 +++++++++++++++++++++++++++++++++++
 5 files changed, 487 insertions(+), 1 deletions(-)
 create mode 100644 rcu-pointer.h
 create mode 100644 rcu.c
 create mode 100644 rcu.h

diff --git a/Makefile.objs b/Makefile.objs
index 16eef38..902a083 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -6,7 +6,7 @@ qobject-obj-y += qerror.o error.o
 
 #######################################################################
 # oslib-obj-y is code depending on the OS (win32 vs posix)
-oslib-obj-y = osdep.o
+oslib-obj-y = osdep.o rcu.o
 oslib-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o
 oslib-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o
 
diff --git a/qemu-queue.h b/qemu-queue.h
index 1d07745..cc8e55b 100644
--- a/qemu-queue.h
+++ b/qemu-queue.h
@@ -100,6 +100,17 @@ struct {                                                   
             \
         (head)->lh_first = NULL;                                        \
 } while (/*CONSTCOND*/0)
 
+#define QLIST_SWAP(dstlist, srclist, field) do {                        \
+        void *tmplist;                                                  \
+        tmplist = (srclist)->lh_first;                                  \
+        (srclist)->lh_first = (dstlist)->lh_first;                      \
+        if ((srclist)->lh_first != NULL)                                \
+            (srclist)->lh_first->field.le_prev = &(srclist)->lh_first;  \
+        (dstlist)->lh_first = tmplist; \
+        if ((dstlist)->lh_first != NULL)                                \
+            (dstlist)->lh_first->field.le_prev = &(dstlist)->lh_first;  \
+} while (/*CONSTCOND*/0)
+
 #define QLIST_INSERT_AFTER(listelm, elm, field) do {                    \
         if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)  \
                 (listelm)->field.le_next->field.le_prev =               \
diff --git a/rcu-pointer.h b/rcu-pointer.h
new file mode 100644
index 0000000..4381313
--- /dev/null
+++ b/rcu-pointer.h
@@ -0,0 +1,119 @@
+#ifndef _URCU_POINTER_STATIC_H
+#define _URCU_POINTER_STATIC_H
+
+/*
+ * urcu-pointer-static.h
+ *
+ * Userspace RCU header. Operations on pointers.
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu-pointer.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers <address@hidden>
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+#ifdef __alpha__
+#define smp_read_barrier_depends()   asm volatile("mb":::"memory")
+#else
+#define smp_read_barrier_depends()
+#endif
+
+/**
+ * rcu_dereference - reads (copy) a RCU-protected pointer to a local variable
+ * into a RCU read-side critical section. The pointer can later be safely
+ * dereferenced within the critical section.
+ *
+ * This ensures that the pointer copy is invariant thorough the whole critical
+ * section.
+ *
+ * Inserts memory barriers on architectures that require them (currently only
+ * Alpha) and documents which pointers are protected by RCU.
+ *
+ * The compiler memory barrier in ACCESS_ONCE() ensures that value-speculative
+ * optimizations (e.g. VSS: Value Speculation Scheduling) does not perform the
+ * data read before the pointer read by speculating the value of the pointer.
+ * Correct ordering is ensured because the pointer is read as a volatile 
access.
+ * This acts as a global side-effect operation, which forbids reordering of
+ * dependent memory operations. Note that such concern about 
dependency-breaking
+ * optimizations will eventually be taken care of by the "memory_order_consume"
+ * addition to forthcoming C++ standard.
+ *
+ * Should match rcu_assign_pointer() or rcu_xchg_pointer().
+ */
+
+#define rcu_dereference(p)     ({                                      \
+                               typeof(p) _________p1 = ACCESS_ONCE(p); \
+                               smp_read_barrier_depends();             \
+                               (_________p1);                          \
+                               })
+
+/**
+ * rcu_cmpxchg_pointer - same as rcu_assign_pointer, but tests if the pointer
+ * is as expected by "old". If succeeds, returns the previous pointer to the
+ * data structure, which can be safely freed after waiting for a quiescent 
state
+ * using synchronize_rcu(). If fails (unexpected value), returns old (which
+ * should not be freed !).
+ */
+
+#define rcu_cmpxchg_pointer(p, old, _new)                              \
+       ({                                                              \
+               typeof(*p) _________pold = (old);                       \
+               typeof(*p) _________pnew = (_new);                      \
+               if (!__builtin_constant_p(_new) ||                      \
+                   ((_new) != NULL))                                   \
+                       __sync_synchronize();                           \
+               uatomic_cmpxchg(p, _________pold, _________pnew);       \
+       })
+
+#define rcu_set_pointer(p, v)                          \
+       ({                                              \
+               typeof(*p) _________pv = (v);           \
+               if (!__builtin_constant_p(v) ||         \
+                   ((v) != NULL))                      \
+                       __sync_synchronize();           \
+               ACCESS_ONCE(*p) = _________pv;          \
+       })
+
+/**
+ * rcu_assign_pointer - assign (publicize) a pointer to a new data structure
+ * meant to be read by RCU read-side critical sections. Returns the assigned
+ * value.
+ *
+ * Documents which pointers will be dereferenced by RCU read-side critical
+ * sections and adds the required memory barriers on architectures requiring
+ * them. It also makes sure the compiler does not reorder code initializing the
+ * data structure before its publication.
+ *
+ * Should match rcu_dereference_pointer().
+ */
+
+#define rcu_assign_pointer(p, v)       rcu_set_pointer(&(p), v)
+
+#ifdef __cplusplus 
+}
+#endif
+
+#endif /* _URCU_POINTER_STATIC_H */
diff --git a/rcu.c b/rcu.c
new file mode 100644
index 0000000..e2f347a
--- /dev/null
+++ b/rcu.c
@@ -0,0 +1,221 @@
+/*
+ * urcu-qsbr.c
+ *
+ * Userspace RCU QSBR library
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers <address@hidden>
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright 2011 Red Hat, Inc.
+ *
+ * Ported to QEMU by Paolo Bonzini  <address@hidden>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include "rcu.h"
+#include "qemu-barrier.h"
+
+/*
+ * Global grace period counter.  Bit 0 is one if the thread is online.
+ * Bits 1 and above are defined in synchronize_rcu/update_counter_and_wait.
+ */
+#define RCU_GP_ONLINE           (1UL << 0)
+#define RCU_GP_CTR              (1UL << 1)
+
+unsigned long rcu_gp_ctr = RCU_GP_ONLINE;
+
+QemuEvent rcu_gp_event;
+QemuMutex rcu_gp_lock;
+
+/*
+ * Check whether a quiescent state was crossed between the beginning of
+ * update_counter_and_wait and now.
+ */
+static inline int rcu_gp_ongoing(unsigned long *ctr)
+{
+       unsigned long v;
+
+       v = ACCESS_ONCE(*ctr);
+       return v && (v != rcu_gp_ctr);
+}
+
+/*
+ * Written to only by each individual reader. Read by both the reader and the
+ * writers.
+ */
+__thread struct rcu_reader rcu_reader;
+
+typedef QLIST_HEAD(, rcu_reader) ThreadList;
+static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
+
+static void update_counter_and_wait(void)
+{
+       ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
+       struct rcu_reader *index, *tmp;
+
+       if (sizeof (rcu_gp_ctr) <= 4) {
+               /* Switch parity: 0 -> 1, 1 -> 0 */
+               ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr ^ RCU_GP_CTR;
+       } else {
+               /* Increment current grace period.  */
+               ACCESS_ONCE(rcu_gp_ctr) = rcu_gp_ctr + RCU_GP_CTR;
+       }
+
+       barrier();
+
+       /*
+        * Wait for each thread rcu_reader_qs_gp count to become 0.
+        */
+       for (;;) {
+               /*
+                * We want to be notified of changes made to rcu_gp_ongoing
+                * while we walk the list.
+                */
+               qemu_event_reset(&rcu_gp_event);
+               QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
+                       if (!rcu_gp_ongoing(&index->ctr)) {
+                               QLIST_REMOVE(index, node);
+                               QLIST_INSERT_HEAD(&qsreaders, index, node);
+                       }
+               }
+
+               if (QLIST_EMPTY(&registry)) {
+                       break;
+               }
+
+               /*
+                * Wait for one thread to report a quiescent state and
+                * try again.
+                */
+               qemu_event_wait(&rcu_gp_event);
+       }
+       smp_mb();
+
+       /* put back the reader list in the registry */
+       QLIST_SWAP(&registry, &qsreaders, node);
+}
+
+/*
+ * Using a two-subphases algorithm for architectures with smaller than 64-bit
+ * long-size to ensure we do not encounter an overflow bug.
+ */
+
+void synchronize_rcu(void)
+{
+       unsigned long was_online;
+
+       was_online = rcu_reader.ctr;
+
+       /* All threads should read qparity before accessing data structure
+        * where new ptr points to.
+        */
+
+       /*
+        * Mark the writer thread offline to make sure we don't wait for
+        * our own quiescent state. This allows using synchronize_rcu()
+        * in threads registered as readers.
+        *
+        * Also includes a memory barrier.
+        */
+       if (was_online) {
+               rcu_thread_offline();
+       } else {
+               smp_mb();
+       }
+
+       qemu_mutex_lock(&rcu_gp_lock);
+
+       if (QLIST_EMPTY(&registry))
+               goto out;
+
+       if (sizeof(rcu_gp_ctr) <= 4) {
+               /*
+                * Wait for previous parity to be empty of readers.
+                */
+               update_counter_and_wait();      /* 0 -> 1, wait readers in 
parity 0 */
+
+               /*
+                * Must finish waiting for quiescent state for parity 0 before
+                * committing next rcu_gp_ctr update to memory. Failure to
+                * do so could result in the writer waiting forever while new
+                * readers are always accessing data (no progress).  Enforce
+                * compiler-order of load rcu_reader ctr before store to
+                * rcu_gp_ctr.
+                */
+               barrier();
+
+               /*
+                * Adding a memory barrier which is _not_ formally required,
+                * but makes the model easier to understand. It does not have a
+                * big performance impact anyway, given this is the write-side.
+                */
+               smp_mb();
+       }
+
+       /*
+        * Wait for previous parity/grace period to be empty of readers.
+        */
+       update_counter_and_wait();      /* 1 -> 0, wait readers in parity 1 */
+out:
+       qemu_mutex_unlock(&rcu_gp_lock);
+
+       if (was_online) {
+               /* Also includes a memory barrier.  */
+               rcu_thread_online();
+       } else {
+               /*
+                * Finish waiting for reader threads before letting the old
+                * ptr being freed.
+                */
+               smp_mb();
+       }
+}
+
+static void rcu_init(void)
+{
+       qemu_mutex_init(&rcu_gp_lock);
+       qemu_event_init(&rcu_gp_event, true);
+}
+
+void rcu_register_thread(void)
+{
+       static QemuOnce init = QEMU_ONCE_INIT;
+       assert(rcu_reader.ctr == 0);
+
+       qemu_once(&init, rcu_init);
+       qemu_mutex_lock(&rcu_gp_lock);
+       QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+       qemu_mutex_unlock(&rcu_gp_lock);
+       rcu_thread_online();
+}
+
+void rcu_unregister_thread(void)
+{
+       /*
+        * We have to make the thread offline otherwise we end up dealocking
+        * with a waiting writer.
+        */
+       rcu_thread_offline();
+       qemu_mutex_lock(&rcu_gp_lock);
+       QLIST_REMOVE(&rcu_reader, node);
+       qemu_mutex_unlock(&rcu_gp_lock);
+}
diff --git a/rcu.h b/rcu.h
new file mode 100644
index 0000000..569f696
--- /dev/null
+++ b/rcu.h
@@ -0,0 +1,135 @@
+#ifndef _URCU_QSBR_H
+#define _URCU_QSBR_H
+
+/*
+ * urcu-qsbr.h
+ *
+ * Userspace RCU QSBR header.
+ *
+ * LGPL-compatible code should include this header with :
+ *
+ * #define _LGPL_SOURCE
+ * #include <urcu.h>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+c* IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "compiler.h"
+#include "rcu-pointer.h"
+#include "qemu-thread.h"
+#include "qemu-queue.h"
+#include "qemu-barrier.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+/*
+ * Important !
+ *
+ * Each thread containing read-side critical sections must be registered
+ * with rcu_register_thread() before calling rcu_read_lock().
+ * rcu_unregister_thread() should be called before the thread exits.
+ */
+
+#ifdef DEBUG_RCU
+#define rcu_assert(args...)    assert(args)
+#else
+#define rcu_assert(args...)
+#endif
+
+#define RCU_GP_ONLINE          (1UL << 0)
+#define RCU_GP_CTR             (1UL << 1)
+
+/*
+ * Global quiescent period counter with low-order bits unused.
+ * Using a int rather than a char to eliminate false register dependencies
+ * causing stalls on some architectures.
+ */
+extern unsigned long rcu_gp_ctr;
+
+extern QemuEvent rcu_gp_event;
+
+struct rcu_reader {
+       /* Data used by both reader and synchronize_rcu() */
+       unsigned long ctr;
+       /* Data used for registry */
+       QLIST_ENTRY(rcu_reader) node;
+};
+
+extern __thread struct rcu_reader rcu_reader;
+
+static inline void rcu_read_lock(void)
+{
+       rcu_assert(rcu_reader.ctr);
+}
+
+static inline void rcu_read_unlock(void)
+{
+}
+
+static inline void rcu_quiescent_state(void)
+{
+       smp_mb();
+       ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr);
+#if 0
+       /* write rcu_reader.ctr before read futex.  Included in
+        * qemu_event_set. */
+       smp_mb();
+#endif
+       qemu_event_set(&rcu_gp_event);
+}
+
+static inline void rcu_thread_offline(void)
+{
+       smp_mb();
+       ACCESS_ONCE(rcu_reader.ctr) = 0;
+#if 0
+       /* write rcu_reader.ctr before read futex.  Included in
+        * qemu_event_set. */
+       smp_mb();
+#endif
+       qemu_event_set(&rcu_gp_event);
+}
+
+static inline void rcu_thread_online(void)
+{
+       smp_mb();       /* Ensure the compiler does not reorder us with mutex */
+       ACCESS_ONCE(rcu_reader.ctr) = ACCESS_ONCE(rcu_gp_ctr);
+       smp_mb();
+}
+
+extern void synchronize_rcu(void);
+
+/*
+ * Reader thread registration.
+ */
+extern void rcu_register_thread(void);
+extern void rcu_unregister_thread(void);
+
+#ifdef __cplusplus 
+}
+#endif
+
+#endif /* _URCU_QSBR_H */
-- 
1.7.6





reply via email to

[Prev in Thread] Current Thread [Next in Thread]