>From 4cb3a1e2e3563a1dc0969bce3edd84918067d199 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 16 Jul 2015 00:48:40 -0700 Subject: [PATCH] Better heuristic for C stack overflow Improve the heuristic for distinguishing stack overflows from other SIGSEGV causes (Bug#21004). Corinna Vinschen explained that the getrlimit method wasn't portable to Cygwin; see: https://www.cygwin.com/ml/cygwin/2015-07/msg00092.html Corinna suggested pthread_getattr_np but this also has problems. Instead, replace the low-level system stuff with a simple heuristic based on known good stack addresses. * src/eval.c, src/lisp.h (near_C_stack_top): New function. * src/sysdep.c: Don't include . (stack_direction): Remove. All uses removed. (stack_overflow): New function. (handle_sigsegv): Use it instead of incorrect getrlimit heuristic. Make SEGV fatal in non-main threads. --- src/eval.c | 6 ++++ src/lisp.h | 1 + src/sysdep.c | 93 ++++++++++++++++++++++++++++++++++++------------------------ 3 files changed, 63 insertions(+), 37 deletions(-) diff --git a/src/eval.c b/src/eval.c index 4f7f42f..9bdcf4b 100644 --- a/src/eval.c +++ b/src/eval.c @@ -200,6 +200,12 @@ backtrace_next (union specbinding *pdl) return pdl; } +/* Return a pointer to somewhere near the top of the C stack. */ +void * +near_C_stack_top (void) +{ + return backtrace_args (backtrace_top ()); +} void init_eval_once (void) diff --git a/src/lisp.h b/src/lisp.h index c3289c9..341603f 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4029,6 +4029,7 @@ extern _Noreturn void verror (const char *, va_list) ATTRIBUTE_FORMAT_PRINTF (1, 0); extern void un_autoload (Lisp_Object); extern Lisp_Object call_debugger (Lisp_Object arg); +extern void *near_C_stack_top (void); extern void init_eval_once (void); extern Lisp_Object safe_call (ptrdiff_t, Lisp_Object, ...); extern Lisp_Object safe_call1 (Lisp_Object, Lisp_Object); diff --git a/src/sysdep.c b/src/sysdep.c index 91036f0..30a55f1 100644 --- a/src/sysdep.c +++ b/src/sysdep.c @@ -79,9 +79,6 @@ along with GNU Emacs. If not, see . */ #include "msdos.h" #endif -#ifdef HAVE_SYS_RESOURCE_H -#include -#endif #include #include #include @@ -1625,14 +1622,58 @@ handle_arith_signal (int sig) #ifdef HAVE_STACK_OVERFLOW_HANDLING -/* -1 if stack grows down as expected on most OS/ABI variants, 1 otherwise. */ - -static int stack_direction; - /* Alternate stack used by SIGSEGV handler below. */ static unsigned char sigsegv_stack[SIGSTKSZ]; + +/* Return true if SIGINFO indicates a stack overflow. */ + +static bool +stack_overflow (siginfo_t *siginfo) +{ + /* In theory, a more-accurate heuristic can be obtained by using + GNU/Linux pthread_getattr_np along with POSIX pthread_attr_getstack + and pthread_attr_getguardsize to find the location and size of the + guard area. In practice, though, these functions are so hard to + use reliably that they're not worth bothering with. E.g., see: + https://sourceware.org/bugzilla/show_bug.cgi?id=16291 + Other operating systems also have problems, e.g., Solaris's + stack_violation function is tailor-made for this problem, but it + doesn't work on Solaris 11.2 x86-64 with a 32-bit executable. + + GNU libsigsegv is overkill for Emacs; otherwise it might be a + candidate here. */ + + if (!siginfo) + return false; + + /* The faulting address. */ + char *addr = siginfo->si_addr; + if (!addr) + return false; + + /* The known top and bottom of the stack. The actual stack may + extend a bit beyond these boundaries. */ + char *bot = stack_bottom; + char *top = near_C_stack_top (); + + /* Log base 2 of the stack heuristic ratio. This ratio is the size + of the known stack divided by the size of the guard area past the + end of the stack top. The heuristic is that a bad address is + considered to be a stack overflow if it occurs within + stacksize>>LG_STACK_HEURISTIC bytes above the top of the known + stack. This heuristic is not exactly correct but it's good + enough in practice. */ + enum { LG_STACK_HEURISTIC = 8 }; + + if (bot < top) + return 0 <= addr - top && addr - top < (top - bot) >> LG_STACK_HEURISTIC; + else + return 0 <= top - addr && top - addr < (bot - top) >> LG_STACK_HEURISTIC; +} + + /* Attempt to recover from SIGSEGV caused by C stack overflow. */ static void @@ -1640,35 +1681,15 @@ handle_sigsegv (int sig, siginfo_t *siginfo, void *arg) { /* Hard GC error may lead to stack overflow caused by too nested calls to mark_object. No way to survive. */ - if (!gc_in_progress) - { - struct rlimit rlim; + bool fatal = gc_in_progress; - if (!getrlimit (RLIMIT_STACK, &rlim)) - { - /* STACK_DANGER_ZONE has to be bigger than 16K on Cygwin, for - reasons explained in - https://www.cygwin.com/ml/cygwin/2015-06/msg00381.html. */ -#ifdef CYGWIN - enum { STACK_DANGER_ZONE = 32 * 1024 }; -#else - enum { STACK_DANGER_ZONE = 16 * 1024 }; -#endif - char *beg, *end, *addr; - - beg = stack_bottom; - end = stack_bottom + stack_direction * rlim.rlim_cur; - if (beg > end) - addr = beg, beg = end, end = addr; - addr = (char *) siginfo->si_addr; - /* If we're somewhere on stack and too close to - one of its boundaries, most likely this is it. */ - if (beg < addr && addr < end - && (addr - beg < STACK_DANGER_ZONE - || end - addr < STACK_DANGER_ZONE)) - siglongjmp (return_to_command_loop, 1); - } - } +#ifdef FORWARD_SIGNAL_TO_MAIN_THREAD + if (!fatal && !pthread_equal (pthread_self (), main_thread)) + fatal = true; +#endif + + if (!fatal && stack_overflow (siginfo)) + siglongjmp (return_to_command_loop, 1); /* Otherwise we can't do anything with this. */ deliver_fatal_thread_signal (sig); @@ -1683,8 +1704,6 @@ init_sigsegv (void) struct sigaction sa; stack_t ss; - stack_direction = ((char *) &ss < stack_bottom) ? -1 : 1; - ss.ss_sp = sigsegv_stack; ss.ss_size = sizeof (sigsegv_stack); ss.ss_flags = 0; -- 2.1.0