>From 1016c78fe51183ed4d89eeb90c1c06f43c23cbb8 Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Mon, 17 Oct 2016 22:17:27 -0400 Subject: [PATCH v1] Fix handling of allocation in regex matching `re_match_2_internal' uses pointers to the lisp objects that it searches. Since it may call malloc when growing the "fail stack", these pointers may be invalidated while searching, resulting in memory curruption (Bug #24358). To fix this, we check the pointer that the lisp object points to before and after growing the stack, and update existing pointers accordingly. This means that all callers of regex searching functions must pass a reference to the lisp object that they search. Callers searching pure C strings that can't relocate pass Qnil. To reduce the need for preprocessor conditionals, we define Lisp_Object as an enum with just the value Qnil when building regex.c for non-emacs programs (etags). * src/regex.c (STR_BASE_PTR): New macro. (ENSURE_FAIL_STACK): Use it to update pointers after growing the stack. (re_search, re_search_2, re_match_2, re_match_2_internal): Add BASE parameter. * src/dired.c (directory_files_internal): * src/search.c (looking_at_1, string_match_1): (fast_string_match_internal, fast_looking_at, search_buffer): Pass the searched lisp object to re_search, re_search_2, re_match_2 as the BASE parameter. * src/search.c (fast_c_string_match_ignore_case): Pass Qnil as BASE parameter. * lib-src/etags.c (regex_tag_multiline): * src/regex.c (re_match, regexec) [!emacs]: Pass dummy NO_LISP arg for BASE. * src/regex.h (Lisp_Object) [!emacs]: New single valued enum. --- lib-src/etags.c | 2 +- src/dired.c | 2 +- src/regex.c | 79 +++++++++++++++++++++++++++++++++++++++++---------------- src/regex.h | 12 ++++++++- src/search.c | 45 +++++++++++++++++++------------- 5 files changed, 98 insertions(+), 42 deletions(-) diff --git a/lib-src/etags.c b/lib-src/etags.c index 1457700..c8fffc2 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -6304,7 +6304,7 @@ regex_tag_multiline (void) while (match >= 0 && match < filebuf.len) { - match = re_search (rp->pat, buffer, filebuf.len, charno, + match = re_search (rp->pat, NO_LISP, buffer, filebuf.len, charno, filebuf.len - match, &rp->regs); switch (match) { diff --git a/src/dired.c b/src/dired.c index dba575c..a558aa2 100644 --- a/src/dired.c +++ b/src/dired.c @@ -259,7 +259,7 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full, QUIT; bool wanted = (NILP (match) - || re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0); + || re_search (bufp, name, SSDATA (name), len, 0, len, 0) >= 0); immediate_quit = 0; diff --git a/src/regex.c b/src/regex.c index 164eb46..659b1f9 100644 --- a/src/regex.c +++ b/src/regex.c @@ -533,12 +533,11 @@ init_syntax_once (void) typedef char boolean; -static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp, - re_char *string1, size_t size1, - re_char *string2, size_t size2, - ssize_t pos, - struct re_registers *regs, - ssize_t stop); +static regoff_t +re_match_2_internal (struct re_pattern_buffer *bufp, Lisp_Object string_base, + const_re_char *string1, size_t size1, + const_re_char *string2, size_t size2, + ssize_t pos, struct re_registers *regs, ssize_t stop); /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A @@ -1436,11 +1435,38 @@ WEAK_ALIAS (__re_set_syntax, re_set_syntax) #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer #define TOP_FAILURE_HANDLE() fail_stack.frame +#ifdef emacs +#define STR_BASE_PTR(obj) \ + (BUFFERP (obj)? XBUFFER (obj)->text->beg : \ + STRINGP (obj)? SDATA (obj) : \ + NULL) +#else +#define STR_BASE_PTR(obj) ((re_char*)0) +#endif #define ENSURE_FAIL_STACK(space) \ while (REMAINING_AVAIL_SLOTS <= space) { \ + re_char* orig_base = STR_BASE_PTR (string_base); \ if (!GROW_FAIL_STACK (fail_stack)) \ - return -2; \ + return -2; \ + /* GROW_FAIL_STACK may call malloc and relocate the string */ \ + /* pointers. */ \ + ptrdiff_t delta = STR_BASE_PTR (string_base) - orig_base; \ + if (string1) \ + { \ + string1 += delta; \ + end1 += delta; \ + end_match_1 += delta; \ + } \ + if (string2) \ + { \ + string2 += delta; \ + end2 += delta; \ + end_match_2 += delta; \ + } \ + d += delta; \ + dend += delta; \ + dfail += delta; \ DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ } @@ -4222,10 +4248,11 @@ WEAK_ALIAS (__re_set_registers, re_set_registers) doesn't let you say where to stop matching. */ regoff_t -re_search (struct re_pattern_buffer *bufp, const char *string, size_t size, +re_search (struct re_pattern_buffer *bufp, + Lisp_Object base, const char *string, size_t size, ssize_t startpos, ssize_t range, struct re_registers *regs) { - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + return re_search_2 (bufp, base, NULL, 0, string, size, startpos, range, regs, size); } WEAK_ALIAS (__re_search, re_search) @@ -4260,8 +4287,10 @@ WEAK_ALIAS (__re_search, re_search) stack overflow). */ regoff_t -re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, - const char *str2, size_t size2, ssize_t startpos, ssize_t range, +re_search_2 (struct re_pattern_buffer *bufp, Lisp_Object str_base, + const char *str1, size_t size1, + const char *str2, size_t size2, + ssize_t startpos, ssize_t range, struct re_registers *regs, ssize_t stop) { regoff_t val; @@ -4443,7 +4472,8 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, && !bufp->can_be_null) return -1; - val = re_match_2_internal (bufp, string1, size1, string2, size2, + val = re_match_2_internal (bufp, str_base, + string1, size1, string2, size2, startpos, regs, stop); if (val >= 0) @@ -4879,8 +4909,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, const_re_char *p1, re_match (struct re_pattern_buffer *bufp, const char *string, size_t size, ssize_t pos, struct re_registers *regs) { - regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, - size, pos, regs, size); + regoff_t result = re_match_2_internal (bufp, NO_LISP, + NULL, 0, + (re_char*) string, size, + pos, regs, size); return result; } WEAK_ALIAS (__re_match, re_match) @@ -4906,9 +4938,10 @@ WEAK_ALIAS (__re_match, re_match) matched substring. */ regoff_t -re_match_2 (struct re_pattern_buffer *bufp, const char *string1, - size_t size1, const char *string2, size_t size2, ssize_t pos, - struct re_registers *regs, ssize_t stop) +re_match_2 (struct re_pattern_buffer *bufp, Lisp_Object base, + const char *string1, size_t size1, + const char *string2, size_t size2, + ssize_t pos, struct re_registers *regs, ssize_t stop) { regoff_t result; @@ -4919,8 +4952,9 @@ re_match_2 (struct re_pattern_buffer *bufp, const char *string1, SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); #endif - result = re_match_2_internal (bufp, (re_char*) string1, size1, - (re_char*) string2, size2, + result = re_match_2_internal (bufp, base, + (re_char*) string1, size1, + (re_char*) string2, size2, pos, regs, stop); return result; } @@ -4930,8 +4964,9 @@ WEAK_ALIAS (__re_match_2, re_match_2) /* This is a separate function so that we can force an alloca cleanup afterwards. */ static regoff_t -re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1, - size_t size1, const_re_char *string2, size_t size2, +re_match_2_internal (struct re_pattern_buffer *bufp, Lisp_Object string_base, + const_re_char *string1, size_t size1, + const_re_char *string2, size_t size2, ssize_t pos, struct re_registers *regs, ssize_t stop) { /* General temporaries. */ @@ -6572,7 +6607,7 @@ regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string, by '\n' which would throw things off. */ /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, + ret = re_search (&private_preg, NO_LISP, string, len, /* start: */ 0, /* range: */ len, want_reg_info ? ®s : 0); diff --git a/src/regex.h b/src/regex.h index 817167a..4810bc4 100644 --- a/src/regex.h +++ b/src/regex.h @@ -469,13 +469,21 @@ extern const char *re_compile_pattern (const char *__pattern, size_t __length, internal error. */ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); +#ifndef emacs +/* Define Lisp_Object outside of emacs, just so something can be + passed as the BASE parameter to re_search and re_match. */ +typedef enum { NO_LISP } Lisp_Object; +#endif /* Search in the string STRING (with length LENGTH) for the pattern compiled into BUFFER. Start searching at position START, for RANGE characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ + information in REGS (if REGS and BUFFER->no_sub are nonzero). If + STRING is a pointer into a lisp object, pass the object as BASE in + order to correctly handle relocation if re_search calls malloc. */ extern regoff_t re_search (struct re_pattern_buffer *__buffer, + Lisp_Object __base, const char *__string, size_t __length, ssize_t __start, ssize_t __range, struct re_registers *__regs); @@ -484,6 +492,7 @@ extern regoff_t re_search (struct re_pattern_buffer *__buffer, /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer, + Lisp_Object __base, const char *__string1, size_t __length1, const char *__string2, size_t __length2, ssize_t __start, ssize_t __range, @@ -500,6 +509,7 @@ extern regoff_t re_match (struct re_pattern_buffer *__buffer, /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer, + Lisp_Object __base, const char *__string1, size_t __length1, const char *__string2, size_t __length2, ssize_t __start, struct re_registers *__regs, diff --git a/src/search.c b/src/search.c index dc7e2d8..3d3d355 100644 --- a/src/search.c +++ b/src/search.c @@ -287,8 +287,10 @@ looking_at_1 (Lisp_Object string, bool posix) immediate_quit = 1; QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + /* Get pointers and sizes of the two strings that make up the + visible portion of the buffer. Note that we can use pointers + here, unlike in search_buffer, because we only call re_match_2 + once. */ p1 = BEGV_ADDR; s1 = GPT_BYTE - BEGV_BYTE; @@ -308,7 +310,8 @@ looking_at_1 (Lisp_Object string, bool posix) re_match_object = Qnil; - i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, + i = re_match_2 (bufp, Fcurrent_buffer (), + (char *) p1, s1, (char *) p2, s2, PT_BYTE - BEGV_BYTE, (NILP (Vinhibit_changing_match_data) ? &search_regs : NULL), @@ -401,7 +404,7 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, immediate_quit = 1; re_match_object = string; - val = re_search (bufp, SSDATA (string), + val = re_search (bufp, string, SSDATA (string), SBYTES (string), pos_byte, SBYTES (string) - pos_byte, (NILP (Vinhibit_changing_match_data) @@ -473,7 +476,7 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string, immediate_quit = 1; re_match_object = string; - val = re_search (bufp, SSDATA (string), + val = re_search (bufp, string, SSDATA (string), SBYTES (string), 0, SBYTES (string), 0); immediate_quit = 0; @@ -498,7 +501,7 @@ fast_c_string_match_ignore_case (Lisp_Object regexp, Vascii_canon_table, 0, 0); immediate_quit = 1; - val = re_search (bufp, string, len, 0, len, 0); + val = re_search (bufp, Qnil, string, len, 0, len, 0); immediate_quit = 0; return val; } @@ -561,7 +564,8 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, buf = compile_pattern (regexp, 0, Qnil, 0, multibyte); immediate_quit = 1; - len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, + len = re_match_2 (buf, Fcurrent_buffer (), + (char *) p1, s1, (char *) p2, s2, pos_byte, NULL, limit_byte); immediate_quit = 0; @@ -1178,8 +1182,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) { - unsigned char *p1, *p2; - ptrdiff_t s1, s2; + unsigned char *base; + ptrdiff_t off1, off2, s1, s2; struct re_pattern_buffer *bufp; bufp = compile_pattern (string, @@ -1193,16 +1197,19 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, can take too long. */ QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + /* Get offsets and sizes of the two strings that make up the + visible portion of the buffer. We compute offsets instead of + pointers because re_search_2 may call malloc and therefore + change the buffer text address. */ - p1 = BEGV_ADDR; + base = current_buffer->text->beg; + off1 = BEGV_ADDR - base; s1 = GPT_BYTE - BEGV_BYTE; - p2 = GAP_END_ADDR; + off2 = GAP_END_ADDR - base; s2 = ZV_BYTE - GPT_BYTE; if (s1 < 0) { - p2 = p1; + off2 = off1; s2 = ZV_BYTE - BEGV_BYTE; s1 = 0; } @@ -1217,7 +1224,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + val = re_search_2 (bufp, Fcurrent_buffer (), + (char*) (base + off1), s1, + (char*) (base + off2), s2, pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), @@ -1262,8 +1271,10 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, + val = re_search_2 (bufp, Fcurrent_buffer (), + (char*) (base + off1), s1, + (char*) (base + off2), s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), lim_byte - BEGV_BYTE); -- 2.9.3