Subject: [PATCH] thread lock nesting debugging Date: 14 Nov 2008 To: address@hidden I've been seeing all sorts of deadlocks in guile, and so I wrote a small debugging utility to try to track down the problems (and I'm finding bugs with this already. Right now, this is an FYI patch, as its subject to change. However, I'd like to eventually have it applied permanently to the guile source tree; any suggestions on how it should be organized so that it would be acceptable? Signed-off-by: Linas Vepstas --- config.h.in | 3 + configure.in | 11 ++++ libguile/Makefile.am | 2 libguile/debug-locks.c | 111 +++++++++++++++++++++++++++++++++++++++++++++ libguile/pthread-threads.h | 11 ++++ libguile/threads.c | 31 ++++++++++++ libguile/threads.h | 8 +++ 7 files changed, 176 insertions(+), 1 deletion(-) Index: guile-1.8.5/libguile/pthread-threads.h =================================================================== --- guile-1.8.5.orig/libguile/pthread-threads.h 2008-11-14 14:28:58.000000000 -0600 +++ guile-1.8.5/libguile/pthread-threads.h 2008-11-14 17:51:52.000000000 -0600 @@ -91,6 +91,17 @@ extern pthread_mutexattr_t scm_i_pthread #define scm_i_scm_pthread_cond_wait scm_pthread_cond_wait #define scm_i_scm_pthread_cond_timedwait scm_pthread_cond_timedwait +#ifdef GUILE_DEBUG_LOCKS +#undef scm_i_pthread_mutex_lock +#define scm_i_pthread_mutex_lock(ARG) scm_i_pthread_mutex_lock_dbg(ARG, #ARG) + +#undef scm_i_pthread_mutex_unlock +#define scm_i_pthread_mutex_unlock(ARG) scm_i_pthread_mutex_unlock_dbg(ARG, #ARG) + +int scm_i_pthread_mutex_lock_dbg(pthread_mutex_t *, const char *); +int scm_i_pthread_mutex_unlock_dbg(pthread_mutex_t *, const char *); +#endif + #endif /* SCM_PTHREADS_THREADS_H */ /* Index: guile-1.8.5/libguile/threads.c =================================================================== --- guile-1.8.5.orig/libguile/threads.c 2008-11-14 14:21:18.000000000 -0600 +++ guile-1.8.5/libguile/threads.c 2008-11-14 17:39:25.000000000 -0600 @@ -441,6 +441,19 @@ guilify_self_1 (SCM_STACKITEM *base) SCM_SET_FREELIST_LOC (scm_i_freelist, &t->freelist); SCM_SET_FREELIST_LOC (scm_i_freelist2, &t->freelist2); +#ifdef GUILE_DEBUG_LOCKS + int i, j; + for(i=0; ilockname[i] = NULL; + t->lockmutex[i] = NULL; + for(j=0; jlockholder[i][j] = NULL; + } + } +#endif + scm_i_pthread_setspecific (scm_i_thread_key, t); scm_i_pthread_mutex_lock (&t->heap_mutex); @@ -1721,6 +1734,24 @@ scm_init_threads_default_dynamic_state ( scm_i_default_dynamic_state = scm_permanent_object (state); } +#ifdef GUILE_DEBUG_LOCKS +extern int guile_do_abort_on_badlock; + +void prt_one_lockholder(scm_i_thread *); +void prt_lockholders(void); +void prt_lockholders(void) +{ + scm_i_thread *t; + + if (!guile_do_abort_on_badlock) return; + + for (t = all_threads; t; t = t->next_thread) + { + prt_one_lockholder(t); + } +} +#endif + void scm_init_thread_procs () { Index: guile-1.8.5/libguile/threads.h =================================================================== --- guile-1.8.5.orig/libguile/threads.h 2008-11-14 14:17:20.000000000 -0600 +++ guile-1.8.5/libguile/threads.h 2008-11-14 17:20:12.000000000 -0600 @@ -108,6 +108,14 @@ typedef struct scm_i_thread { SCM_STACKITEM *top; jmp_buf regs; +#ifdef GUILE_DEBUG_LOCKS +#define LOCK_STACK_DEPTH 8 +#define TRACE_STACK_DEPTH 4 + const char *lockname[LOCK_STACK_DEPTH]; + char *lockholder[LOCK_STACK_DEPTH][TRACE_STACK_DEPTH]; + pthread_mutex_t *lockmutex[LOCK_STACK_DEPTH]; +#endif + } scm_i_thread; #define SCM_I_IS_THREAD(x) SCM_SMOB_PREDICATE (scm_tc16_thread, x) Index: guile-1.8.5/configure.in =================================================================== --- guile-1.8.5.orig/configure.in 2008-11-14 15:01:15.000000000 -0600 +++ guile-1.8.5/configure.in 2008-11-14 16:42:59.000000000 -0600 @@ -122,6 +122,13 @@ AC_ARG_ENABLE(debug-malloc, [Define this if you want to debug scm_must_malloc/realloc/free calls.]) fi) +AC_ARG_ENABLE(debug-locks, + [ --enable-debug-locks include thread lock debugging code], + if test "$enable_debug_locks" = y || test "$enable_debug_locks" = yes; then + AC_DEFINE(GUILE_DEBUG_LOCKS, 1, + [Define this if you want to debug pthread lock nesting and deadlock trouble.]) + fi) + SCM_I_GSC_GUILE_DEBUG=0 AC_ARG_ENABLE(guile-debug, [AC_HELP_STRING([--enable-guile-debug], @@ -263,6 +270,10 @@ if test "$enable_debug_malloc" = yes; th AC_LIBOBJ([debug-malloc]) fi +if test "$enable_debug_locks" = yes; then + AC_LIBOBJ([debug-locks]) +fi + if test "$enable_elisp" = yes; then SCM_I_GSC_ENABLE_ELISP=1 else Index: guile-1.8.5/config.h.in =================================================================== --- guile-1.8.5.orig/config.h.in 2008-11-14 15:22:54.000000000 -0600 +++ guile-1.8.5/config.h.in 2008-11-14 15:23:38.000000000 -0600 @@ -42,6 +42,9 @@ Boston, MA 02110-1301, USA. /* Define this if you want to debug scm_must_malloc/realloc/free calls. */ #undef GUILE_DEBUG_MALLOC +/* Define this if you want to debug thread locking and deadlocks. */ +#undef GUILE_DEBUG_LOCKS + /* The imaginary unit (positive square root of -1). */ #undef GUILE_I Index: guile-1.8.5/libguile/Makefile.am =================================================================== --- guile-1.8.5.orig/libguile/Makefile.am 2008-11-14 16:24:29.000000000 -0600 +++ guile-1.8.5/libguile/Makefile.am 2008-11-14 16:25:59.000000000 -0600 @@ -159,7 +159,7 @@ EXTRA_libguile_la_SOURCES = _scm.h \ inet_aton.c memmove.c putenv.c strerror.c \ dynl.c regex-posix.c \ filesys.c posix.c net_db.c socket.c \ - debug-malloc.c mkstemp.c \ + debug-locks.c debug-malloc.c mkstemp.c \ win32-uname.c win32-dirent.c win32-socket.c ## delete guile-snarf.awk from the installation bindir, in case it's Index: guile-1.8.5/libguile/debug-locks.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ guile-1.8.5/libguile/debug-locks.c 2008-11-14 17:52:32.000000000 -0600 @@ -0,0 +1,111 @@ + +/* + * debug-locks.c + * + * utilities for debugging deadlocks + */ + +#include +#include +#include "_scm.h" + +extern void prt_lockholders(void); + +int guile_do_abort_on_badlock = 1; + +void prt_one_lockholder(scm_i_thread *); +void prt_one_lockholder(scm_i_thread *t) +{ + int i, j; + + printf ("\nThread xx\n"); + for (i=0; ilockname[i]) break; + printf("%d: %s (%p) in:\n", i, t->lockname[i], t->lockmutex[i]); + for (j=0; jlockholder[i][j]) break; + printf("\t%s\n", t->lockholder[i][j]); + } + } +} + + +int scm_i_pthread_mutex_lock_dbg(pthread_mutex_t *mtx, const char *lockstr) +{ + int i,j; + int rc = pthread_mutex_lock(mtx); + scm_i_thread *tp = SCM_I_CURRENT_THREAD; + + if (NULL == tp) + return rc; + + for (i=0; ilockname[i]) + { + void * b[TRACE_STACK_DEPTH+1]; + int sz = backtrace(b, TRACE_STACK_DEPTH+1); + char **s = backtrace_symbols(b, sz); + for (j=0; jlockholder[i][j] = s[j+1]; + } + tp->lockname[i] = lockstr; + tp->lockmutex[i] = mtx; + break; + } + } + + if (LOCK_STACK_DEPTH <= i) + { + fprintf(stderr, "Error: thread is holding too many locks\n"); + prt_lockholders(); + if (guile_do_abort_on_badlock) abort(); + } + + return rc; +} + +int scm_i_pthread_mutex_unlock_dbg(pthread_mutex_t *mtx, const char * lockstr) +{ + int i,j; + + scm_i_thread *tp = SCM_I_CURRENT_THREAD; + if (NULL == tp) + return pthread_mutex_unlock(mtx); + + for(i=LOCK_STACK_DEPTH-1; i>=0; i--) + { + if (0x0 == tp->lockname[i]) + continue; + + if (tp->lockmutex[i] != mtx) + { + fprintf(stderr, "Error: unlocking is badly nested: " + "Attempting to unlock %s (%p)\n", + lockstr, mtx); + prt_one_lockholder(tp); + // prt_lockholders(); + if (guile_do_abort_on_badlock) abort(); + } + tp->lockname[i] = NULL; + tp->lockmutex[i] = NULL; + for (j=0; jlockholder[i][j]); + tp->lockholder[i][j] = NULL; + } + break; + } + + if (0 > i) + { + fprintf(stderr, "Error: unlocking a lock that's not held\n"); + prt_lockholders(); + if (guile_do_abort_on_badlock) abort(); + } + + return pthread_mutex_unlock(mtx); +}