gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-678


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-678-g7bda05c
Date: Tue, 28 Apr 2015 06:28:38 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  7bda05c66848de97a7b43aa3e37ff4336f1b3220 (commit)
      from  454ae7c0f350842ab40a30ff4a2643cd76e8e277 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=7bda05c66848de97a7b43aa3e37ff4336f1b3220

commit 7bda05c66848de97a7b43aa3e37ff4336f1b3220
Author: Arnold D. Robbins <address@hidden>
Date:   Tue Apr 28 09:28:04 2015 +0300

    Fix bracket handling. "This time for sure."

diff --git a/ChangeLog b/ChangeLog
index e5d473c..3e9764a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-04-28         Arnold D. Robbins     <address@hidden>
+
+       * awkgram.y (yylex): Rework the bracket handling from zero.
+       Thanks to Michal Jaegermann for yet another test case.
+
 2015-04-27         Arnold D. Robbins     <address@hidden>
 
        * awkgram.y (yylex): Make change of Jan 7 for parsing regexps
diff --git a/awkgram.c b/awkgram.c
index 530aa27..14e29d9 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -5358,21 +5358,24 @@ yylex(void)
        thisline = NULL;
        if (want_regexp) {
                int in_brack = 0;       /* count brackets, [[:alnum:]] allowed 
*/
+               int b_index = -1;
+               int cur_index = 0;
+
                /*
-                * Counting brackets is non-trivial. [[] is ok,
-                * and so is [\]], with a point being that /[/]/ as a regexp
-                * constant has to work.
+                * Here is what's ok with brackets:
+                *
+                * [[] [^[] []] [^]] [.../...]
+                * [...\[...] [...\]...] [...\/...]
+                * 
+                * (Remember that all of the above are inside /.../)
+                *
+                * The code for \ handles \[, \] and \/.
                 *
-                * Do not count [ or ] if either one is preceded by a \.
-                * A `[' should be counted if
-                *  a) it is the first one so far (in_brack == 0)
-                *  b) it is the `[' in `[:'
-                * A ']' should be counted if not preceded by a \, since
-                * it is either closing `:]' or just a plain list.
-                * According to POSIX, []] is how you put a ] into a set.
-                * Try to handle that too.
+                * Otherwise, track the first open [ position, and if
+                * an embedded [ or ] occurs, allow it to pass through
+                * if it's right after the first [ or after [^.
                 *
-                * The code for \ handles \[ and \].
+                * Whew!
                 */
 
                want_regexp = false;
@@ -5382,17 +5385,21 @@ yylex(void)
 
                        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch 
(c) {
                        case '[':
-                               /* one day check for `.' and `=' too */
-                               if (nextc(false) == ':' || in_brack == 0)
-                                       in_brack++;
-                               pushback();
-                               break;
                        case ']':
-                               if ((tok[-1] == '[' && tok[-2] != '\\')
-                                   || (tok[-2] == '[' && tok[-3] != '\\' && 
tok[-1] == '^'))
-                                       /* do nothing */;
-                               else
+                               cur_index = tok - tokstart;
+                               if (in_brack > 0
+                                   && (cur_index == b_index + 1 
+                                       || (cur_index == b_index + 2 && tok[-1] 
== '^')))
+                                       ; /* do nothing */
+                               else if (c == '[') {
+                                       in_brack++;
+                                       if (in_brack == 1)
+                                               b_index = tok - tokstart;
+                               } else {
                                        in_brack--;
+                                       if (in_brack == 0)
+                                               b_index = -1;
+                               }
                                break;
                        case '\\':
                                if ((c = nextc(false)) == END_FILE) {
diff --git a/awkgram.y b/awkgram.y
index 31751e8..beb85d5 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -3019,21 +3019,24 @@ yylex(void)
        thisline = NULL;
        if (want_regexp) {
                int in_brack = 0;       /* count brackets, [[:alnum:]] allowed 
*/
+               int b_index = -1;
+               int cur_index = 0;
+
                /*
-                * Counting brackets is non-trivial. [[] is ok,
-                * and so is [\]], with a point being that /[/]/ as a regexp
-                * constant has to work.
+                * Here is what's ok with brackets:
+                *
+                * [[] [^[] []] [^]] [.../...]
+                * [...\[...] [...\]...] [...\/...]
+                * 
+                * (Remember that all of the above are inside /.../)
+                *
+                * The code for \ handles \[, \] and \/.
                 *
-                * Do not count [ or ] if either one is preceded by a \.
-                * A `[' should be counted if
-                *  a) it is the first one so far (in_brack == 0)
-                *  b) it is the `[' in `[:'
-                * A ']' should be counted if not preceded by a \, since
-                * it is either closing `:]' or just a plain list.
-                * According to POSIX, []] is how you put a ] into a set.
-                * Try to handle that too.
+                * Otherwise, track the first open [ position, and if
+                * an embedded [ or ] occurs, allow it to pass through
+                * if it's right after the first [ or after [^.
                 *
-                * The code for \ handles \[ and \].
+                * Whew!
                 */
 
                want_regexp = false;
@@ -3043,17 +3046,21 @@ yylex(void)
 
                        if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch 
(c) {
                        case '[':
-                               /* one day check for `.' and `=' too */
-                               if (nextc(false) == ':' || in_brack == 0)
-                                       in_brack++;
-                               pushback();
-                               break;
                        case ']':
-                               if ((tok[-1] == '[' && tok[-2] != '\\')
-                                   || (tok[-2] == '[' && tok[-3] != '\\' && 
tok[-1] == '^'))
-                                       /* do nothing */;
-                               else
+                               cur_index = tok - tokstart;
+                               if (in_brack > 0
+                                   && (cur_index == b_index + 1 
+                                       || (cur_index == b_index + 2 && tok[-1] 
== '^')))
+                                       ; /* do nothing */
+                               else if (c == '[') {
+                                       in_brack++;
+                                       if (in_brack == 1)
+                                               b_index = tok - tokstart;
+                               } else {
                                        in_brack--;
+                                       if (in_brack == 0)
+                                               b_index = -1;
+                               }
                                break;
                        case '\\':
                                if ((c = nextc(false)) == END_FILE) {

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |    5 +++++
 awkgram.c |   49 ++++++++++++++++++++++++++++---------------------
 awkgram.y |   49 ++++++++++++++++++++++++++++---------------------
 3 files changed, 61 insertions(+), 42 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]