>From c3e87ee1e1399f3687db02dc71b13830852a50eb Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Mon, 24 Oct 2016 19:54:29 -0400 Subject: [PATCH v4 1/2] Revert fixes to allocation of regex matching The fix was not complete, and completing it was proving too complicated. - Revert "* src/regex.c (re_search_2): Make new code safe for -Wjump-misses-init." This reverts commit c2a17924a57483d14692c8913edbe8ad24b5ffbb. - Revert "Port to GCC 6.2.1 + --enable-gcc-warnings" This reverts commit f6134bbda259c115c06d4a9a3ab5c39340a15949. - Revert "Fix handling of allocation in regex matching" This reverts commit ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897. - Revert "Fix handling of buffer relocation in regex.c functions" This reverts commit ee04aedc723b035eedaf975422d4eb242894121b. --- src/dired.c | 4 +--- src/regex.c | 73 ------------------------------------------------------------ src/regex.h | 4 +--- src/search.c | 40 ++++++++++----------------------- 4 files changed, 14 insertions(+), 107 deletions(-) diff --git a/src/dired.c b/src/dired.c index 006f74c..dba575c 100644 --- a/src/dired.c +++ b/src/dired.c @@ -259,11 +259,9 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full, QUIT; bool wanted = (NILP (match) - || (re_match_object = name, - re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0)); + || re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0); immediate_quit = 0; - re_match_object = Qnil; /* Stop protecting name from GC. */ if (wanted) { diff --git a/src/regex.c b/src/regex.c index b12e95b..56b18e6 100644 --- a/src/regex.c +++ b/src/regex.c @@ -1438,62 +1438,11 @@ WEAK_ALIAS (__re_set_syntax, re_set_syntax) #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer #define TOP_FAILURE_HANDLE() fail_stack.frame -#ifdef emacs -# define STR_BASE_PTR(obj) \ - (NILP (obj) ? current_buffer->text->beg \ - : STRINGP (obj) ? SDATA (obj) \ - : NULL) -#else -# define STR_BASE_PTR(obj) NULL -#endif #define ENSURE_FAIL_STACK(space) \ while (REMAINING_AVAIL_SLOTS <= space) { \ - re_char *orig_base = STR_BASE_PTR (re_match_object); \ - bool might_relocate = orig_base != NULL; \ - ptrdiff_t string1_off, end1_off, end_match_1_off; \ - ptrdiff_t string2_off, end2_off, end_match_2_off; \ - ptrdiff_t d_off, dend_off, dfail_off; \ - if (might_relocate) \ - { \ - if (string1) \ - { \ - string1_off = string1 - orig_base; \ - end1_off = end1 - orig_base; \ - end_match_1_off = end_match_1 - orig_base; \ - } \ - if (string2) \ - { \ - string2_off = string2 - orig_base; \ - end2_off = end2 - orig_base; \ - end_match_2_off = end_match_2 - orig_base; \ - } \ - d_off = d - orig_base; \ - dend_off = dend - orig_base; \ - dfail_off = dfail - orig_base; \ - } \ if (!GROW_FAIL_STACK (fail_stack)) \ return -2; \ - /* In Emacs, GROW_FAIL_STACK might relocate string pointers. */ \ - if (might_relocate) \ - { \ - re_char *new_base = STR_BASE_PTR (re_match_object); \ - if (string1) \ - { \ - string1 = new_base + string1_off; \ - end1 = new_base + end1_off; \ - end_match_1 = new_base + end_match_1_off; \ - } \ - if (string2) \ - { \ - string2 = new_base + string2_off; \ - end2 = new_base + end2_off; \ - end_match_2 = new_base + end_match_2_off; \ - } \ - d = new_base + d_off; \ - dend = new_base + dend_off; \ - dfail = new_base + dfail_off; \ - } \ DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ } @@ -4380,10 +4329,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, /* Loop through the string, looking for a place to start matching. */ for (;;) { - ptrdiff_t offset1, offset2; - re_char *orig_base; - bool might_relocate; - /* If the pattern is anchored, skip quickly past places we cannot match. We don't bother to treat startpos == 0 specially @@ -4500,17 +4445,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, && !bufp->can_be_null) return -1; - /* re_match_2_internal may allocate, relocating the Lisp text - object that we're searching. */ - IF_LINT (offset2 = 0); /* Work around GCC bug 78081. */ - orig_base = STR_BASE_PTR (re_match_object); - might_relocate = orig_base != NULL; - if (might_relocate) - { - if (string1) offset1 = string1 - orig_base; - if (string2) offset2 = string2 - orig_base; - } - val = re_match_2_internal (bufp, string1, size1, string2, size2, startpos, regs, stop); @@ -4520,13 +4454,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, if (val == -2) return -2; - if (might_relocate) - { - re_char *new_base = STR_BASE_PTR (re_match_object); - if (string1) string1 = offset1 + new_base; - if (string2) string2 = offset2 + new_base; - } - advance: if (!range) break; diff --git a/src/regex.h b/src/regex.h index 61c771c..51f4424 100644 --- a/src/regex.h +++ b/src/regex.h @@ -169,9 +169,7 @@ extern reg_syntax_t re_syntax_options; #ifdef emacs # include "lisp.h" /* In Emacs, this is the string or buffer in which we are matching. - It is used for looking up syntax properties, and also to recompute - pointers in case the object is relocated as a side effect of - calling malloc (if it calls r_alloc_sbrk in ralloc.c). + It is used for looking up syntax properties. If the value is a Lisp string object, we are matching text in that string; if it's nil, we are matching text in the current buffer; if diff --git a/src/search.c b/src/search.c index b50e7f0..fa5ac44 100644 --- a/src/search.c +++ b/src/search.c @@ -287,10 +287,8 @@ looking_at_1 (Lisp_Object string, bool posix) immediate_quit = 1; QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings that make up the - visible portion of the buffer. Note that we can use pointers - here, unlike in search_buffer, because we only call re_match_2 - once, after which we never use the pointers again. */ + /* Get pointers and sizes of the two strings + that make up the visible portion of the buffer. */ p1 = BEGV_ADDR; s1 = GPT_BYTE - BEGV_BYTE; @@ -409,7 +407,6 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, (NILP (Vinhibit_changing_match_data) ? &search_regs : NULL)); immediate_quit = 0; - re_match_object = Qnil; /* Stop protecting string from GC. */ /* Set last_thing_searched only when match data is changed. */ if (NILP (Vinhibit_changing_match_data)) @@ -480,7 +477,6 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string, SBYTES (string), 0, SBYTES (string), 0); immediate_quit = 0; - re_match_object = Qnil; /* Stop protecting string from GC. */ return val; } @@ -568,7 +564,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, pos_byte, NULL, limit_byte); immediate_quit = 0; - re_match_object = Qnil; /* Stop protecting string from GC. */ return len; } @@ -1183,8 +1178,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) { - unsigned char *base; - ptrdiff_t off1, off2, s1, s2; + unsigned char *p1, *p2; + ptrdiff_t s1, s2; struct re_pattern_buffer *bufp; bufp = compile_pattern (string, @@ -1198,19 +1193,16 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, can take too long. */ QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ - /* Get offsets and sizes of the two strings that make up the - visible portion of the buffer. We compute offsets instead of - pointers because re_search_2 may call malloc and therefore - change the buffer text address. */ + /* Get pointers and sizes of the two strings + that make up the visible portion of the buffer. */ - base = current_buffer->text->beg; - off1 = BEGV_ADDR - base; + p1 = BEGV_ADDR; s1 = GPT_BYTE - BEGV_BYTE; - off2 = GAP_END_ADDR - base; + p2 = GAP_END_ADDR; s2 = ZV_BYTE - GPT_BYTE; if (s1 < 0) { - off2 = off1; + p2 = p1; s2 = ZV_BYTE - BEGV_BYTE; s1 = 0; } @@ -1225,16 +1217,12 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, - (char*) (base + off1), s1, - (char*) (base + off2), s2, + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), /* Don't allow match past current point */ pos_byte - BEGV_BYTE); - /* Update 'base' due to possible relocation inside re_search_2. */ - base = current_buffer->text->beg; if (val == -2) { matcher_overflow (); @@ -1274,15 +1262,11 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, { ptrdiff_t val; - val = re_search_2 (bufp, - (char*) (base + off1), s1, - (char*) (base + off2), s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, (NILP (Vinhibit_changing_match_data) ? &search_regs : &search_regs_1), lim_byte - BEGV_BYTE); - /* Update 'base' due to possible relocation inside re_search_2. */ - base = current_buffer->text->beg; if (val == -2) { matcher_overflow (); -- 2.9.3