Re: %union ... {

bison-patches
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: %union ... {

From:	Akim Demaille
Subject:	Re: %union ... {
Date:	Mon, 19 Jun 2006 18:49:38 +0200
User-agent:	Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux)
Here is an updated proposal.  To be able to use the ID without
triggering its registration as a nonterminal, I had to extract the
conversion from string to symbol from the scanner to the parser.  I
prefer this way anyway, that's so much more flexible.

I'm sorry for the long useless portions of the patch due to reindent
and sorting.

I'm installing it.

Index: ChangeLog
from  Akim Demaille  <address@hidden>

        * src/scan-gram.l: No longer "parse" things after `%union' until
        `{'.  Rather, return a single "%union" token.
        No longer make symbols: return strings, and leave the conversion
        to symbols to the parser.
        (SC_PRE_CODE, token_type): Remove.
        * src/parse-gram.y (%union): New field `character'.
        Sort tokens.
        (CHAR): New token.
        (ID, ID_COLON): Now that the scanner no longer makes them
        identifiers, adjust all uses to invoke symbol_get.
        (id_colon): New, wraps the conversion from string to symbol.
        (%union): Accept a possible union_name.
        (symbol): Now can be a char.
        * data/c.m4 (b4_union_name): Leave a default value.
        * data/glr.c, data/yacc.c: Use it.

Index: src/parse-gram.y
===================================================================
RCS file: /cvsroot/bison/bison/src/parse-gram.y,v
retrieving revision 1.75
diff -u -u -r1.75 parse-gram.y
--- src/parse-gram.y 6 Jun 2006 16:40:06 -0000 1.75
+++ src/parse-gram.y 19 Jun 2006 16:46:03 -0000
@@ -99,6 +99,7 @@
   char *chars;
   assoc assoc;
   uniqstr uniqstr;
+  unsigned char character;
 };

 /* Define the tokens together with their human representation.  */
@@ -113,8 +114,6 @@
 %token PERCENT_DESTRUCTOR  "%destructor"
 %token PERCENT_PRINTER     "%printer"

-%token PERCENT_UNION       "%union {...}"
-
 %token PERCENT_LEFT        "%left"
 %token PERCENT_RIGHT       "%right"
 %token PERCENT_NONASSOC    "%nonassoc"
@@ -157,35 +156,37 @@
   PERCENT_YACC            "%yacc"
 ;

-%token TYPE            "type"
+%token BRACED_CODE     "{...}"
+%token CHAR            "char"
+%token EPILOGUE        "epilogue"
 %token EQUAL           "="
-%token SEMICOLON       ";"
-%token PIPE            "|"
 %token ID              "identifier"
 %token ID_COLON        "identifier:"
 %token PERCENT_PERCENT "%%"
+%token PIPE            "|"
 %token PROLOGUE        "%{...%}"
-%token EPILOGUE        "epilogue"
-%token BRACED_CODE     "{...}"
+%token SEMICOLON       ";"
+%token TYPE            "type"
+
+%type <character> CHAR
+%printer { fprintf (stderr, "'%c' (%d)", $$, $$); } CHAR

-%type <chars> STRING string_content
-             "{...}"
-             "%union {...}"
-             PROLOGUE EPILOGUE
-%printer { fprintf (stderr, "\"%s\"", $$); }
-             STRING string_content
-%printer { fprintf (stderr, "{\n%s\n}", $$); }
-             "{...}"
-             "%union {...}"
-             PROLOGUE EPILOGUE
-%type <uniqstr> TYPE
+%type <chars> STRING string_content "{...}" PROLOGUE EPILOGUE
+%printer { fprintf (stderr, "\"%s\"", $$); } STRING string_content
+%printer { fprintf (stderr, "{\n%s\n}", $$); } "{...}" PROLOGUE EPILOGUE
+
+%type <uniqstr> TYPE ID ID_COLON
 %printer { fprintf (stderr, "<%s>", $$); } TYPE
+%printer { fprintf (stderr, "%s", $$); } ID
+%printer { fprintf (stderr, "%s:", $$); } ID_COLON
+
 %type <integer> INT
 %printer { fprintf (stderr, "%d", $$); } INT
-%type <symbol> ID symbol string_as_id
-%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
-%type <symbol> ID_COLON
-%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
+
+%type <symbol> id id_colon symbol string_as_id
+%printer { fprintf (stderr, "%s", $$->tag); } id symbol string_as_id
+%printer { fprintf (stderr, "%s:", $$->tag); } id_colon
+
 %type <assoc> precedence_declarator
 %type <list>  symbols.1
 %%
@@ -252,22 +253,6 @@
     {
       grammar_start_symbol_set ($2, @2);
     }
-| "%union {...}"
-    {
-      char const *body = $1;
-
-      if (typed)
-       {
-         /* Concatenate the union bodies, turning the first one's
-            trailing '}' into '\n', and omitting the second one's '{'.  */
-         char *code = muscle_find ("stype");
-         code[strlen (code) - 1] = '\n';
-         body++;
-       }
-
-      typed = true;
-      muscle_code_grow ("stype", body, @1);
-    }
 | "%destructor" "{...}" symbols.1
     {
       symbol_list *list;
@@ -294,6 +279,40 @@
     }
 ;

+
+/*----------*
+ | %union.  |
+ *----------*/
+
+%token PERCENT_UNION "%union";
+
+union_name:
+  /* Nothing. */ {}
+| ID             { muscle_code_grow ("union_name", $1, @1); }
+;
+
+grammar_declaration:
+  "%union" union_name "{...}"
+    {
+      char const *body = $3;
+
+      if (typed)
+       {
+         /* Concatenate the union bodies, turning the first one's
+            trailing '}' into '\n', and omitting the second one's '{'.  */
+         char *code = muscle_find ("stype");
+         code[strlen (code) - 1] = '\n';
+         body++;
+       }
+
+      typed = true;
+      muscle_code_grow ("stype", body, @3);
+    }
+;
+
+
+
+
 symbol_declaration:
   "%nterm" { current_class = nterm_sym; } symbol_defs.1
     {
@@ -352,24 +371,24 @@
      {
        current_type = $1;
      }
-| ID
+| id
      {
        symbol_class_set ($1, current_class, @1, true);
        symbol_type_set ($1, current_type, @1);
      }
-| ID INT
+| id INT
     {
       symbol_class_set ($1, current_class, @1, true);
       symbol_type_set ($1, current_type, @1);
       symbol_user_token_number_set ($1, $2, @2);
     }
-| ID string_as_id
+| id string_as_id
     {
       symbol_class_set ($1, current_class, @1, true);
       symbol_type_set ($1, current_type, @1);
       symbol_make_alias ($1, $2, @$);
     }
-| ID INT string_as_id
+| id INT string_as_id
     {
       symbol_class_set ($1, current_class, @1, true);
       symbol_type_set ($1, current_type, @1);
@@ -406,7 +425,7 @@
 ;

 rules:
-  ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
+  id_colon { current_lhs = $1; current_lhs_location = @1; } rhses.1
 ;

 rhses.1:
@@ -431,9 +450,32 @@
     { grammar_current_rule_merge_set ($3, @3); }
 ;

+
+/*---------------*
+ | Identifiers.  |
+ *---------------*/
+
+/* Identifiers are return as uniqstr by the scanner.  Depending on
+   their use, we may need to make them genuine symbols.  */
+
+id:
+  ID              { $$ = symbol_get ($1, @1); }
+| CHAR            { char cp[4] = { '\'', $1, '\'', 0 };
+                    $$ = symbol_get (quotearg_style (escape_quoting_style, cp),
+                                    @1);
+                   symbol_class_set ($$, token_sym, @1, false);
+                   symbol_user_token_number_set ($$, $1, @1);
+                  }
+;
+
+id_colon:
+  ID_COLON { $$ = symbol_get ($1, @1); }
+;
+
+
 symbol:
-  ID              { $$ = $1; }
-| string_as_id    { $$ = $1; }
+  id
+| string_as_id
 ;

 /* A string used as an ID: quote it.  */
Index: src/scan-gram.l
===================================================================
RCS file: /cvsroot/bison/bison/src/scan-gram.l,v
retrieving revision 1.90
diff -u -u -r1.90 scan-gram.l
--- src/scan-gram.l 7 Jun 2006 21:17:35 -0000 1.90
+++ src/scan-gram.l 19 Jun 2006 16:46:03 -0000
@@ -88,9 +88,6 @@
  /* A identifier was just read in directives/rules.  Special state
     to capture the sequence `identifier :'. */
 %x SC_AFTER_IDENTIFIER
- /* A keyword that should be followed by some code was read (e.g.
-    %printer). */
-%x SC_PRE_CODE

  /* Three types of user code:
     - prologue (code between `%{' `%}' in the first section, before %%);
@@ -124,9 +121,6 @@
   /* Parent context state, when applicable.  */
   int context_state IF_LINT (= 0);

-  /* Token type to return, when applicable.  */
-  int token_type IF_LINT (= 0);
-
   /* Location of most recent identifier, when applicable.  */
   location id_loc IF_LINT (= empty_location);

@@ -145,7 +139,7 @@
   | Scanning white space.  |
   `-----------------------*/

-<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
+<INITIAL,SC_AFTER_IDENTIFIER>
 {
   /* Comments and white space.  */
   ","         warn_at (*loc, _("stray `,' treated as white space"));
@@ -170,46 +164,46 @@
   `----------------------------*/
 <INITIAL>
 {
-  "%binary"               return PERCENT_NONASSOC;
-  "%debug"                return PERCENT_DEBUG;
-  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
-  "%define"               return PERCENT_DEFINE;
-  "%defines"              return PERCENT_DEFINES;
-  "%destructor"                  /* FIXME: Remove once %union handled 
differently.  */ token_type = BRACED_CODE; return PERCENT_DESTRUCTOR;
-  "%dprec"               return PERCENT_DPREC;
-  "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
-  "%expect"               return PERCENT_EXPECT;
-  "%expect"[-_]"rr"      return PERCENT_EXPECT_RR;
-  "%file-prefix"          return PERCENT_FILE_PREFIX;
+  "%binary"                        return PERCENT_NONASSOC;
+  "%debug"                         return PERCENT_DEBUG;
+  "%default"[-_]"prec"             return PERCENT_DEFAULT_PREC;
+  "%define"                        return PERCENT_DEFINE;
+  "%defines"                       return PERCENT_DEFINES;
+  "%destructor"                            return PERCENT_DESTRUCTOR;
+  "%dprec"                         return PERCENT_DPREC;
+  "%error"[-_]"verbose"            return PERCENT_ERROR_VERBOSE;
+  "%expect"                        return PERCENT_EXPECT;
+  "%expect"[-_]"rr"                return PERCENT_EXPECT_RR;
+  "%file-prefix"                   return PERCENT_FILE_PREFIX;
   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
-  "%initial-action"       /* FIXME: Remove once %union handled differently.  
*/ token_type = BRACED_CODE; return PERCENT_INITIAL_ACTION;
-  "%glr-parser"           return PERCENT_GLR_PARSER;
-  "%left"                 return PERCENT_LEFT;
-  "%lex-param"           /* FIXME: Remove once %union handled differently.  */ 
token_type = BRACED_CODE; return PERCENT_LEX_PARAM;
-  "%locations"            return PERCENT_LOCATIONS;
-  "%merge"               return PERCENT_MERGE;
-  "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
-  "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
-  "%no"[-_]"lines"        return PERCENT_NO_LINES;
-  "%nonassoc"             return PERCENT_NONASSOC;
-  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
-  "%nterm"                return PERCENT_NTERM;
-  "%output"               return PERCENT_OUTPUT;
-  "%parse-param"         /* FIXME: Remove once %union handled differently.  */ 
token_type = BRACED_CODE; return PERCENT_PARSE_PARAM;
-  "%prec"                 return PERCENT_PREC;
-  "%printer"              /* FIXME: Remove once %union handled differently.  
*/ token_type = BRACED_CODE; return PERCENT_PRINTER;
-  "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
-  "%require"              return PERCENT_REQUIRE;
-  "%right"                return PERCENT_RIGHT;
-  "%skeleton"             return PERCENT_SKELETON;
-  "%start"                return PERCENT_START;
-  "%term"                 return PERCENT_TOKEN;
-  "%token"                return PERCENT_TOKEN;
-  "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
-  "%type"                 return PERCENT_TYPE;
-  "%union"               token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
-  "%verbose"              return PERCENT_VERBOSE;
-  "%yacc"                 return PERCENT_YACC;
+  "%initial-action"                return PERCENT_INITIAL_ACTION;
+  "%glr-parser"                    return PERCENT_GLR_PARSER;
+  "%left"                          return PERCENT_LEFT;
+  "%lex-param"                     return PERCENT_LEX_PARAM;
+  "%locations"                     return PERCENT_LOCATIONS;
+  "%merge"                         return PERCENT_MERGE;
+  "%name"[-_]"prefix"              return PERCENT_NAME_PREFIX;
+  "%no"[-_]"default"[-_]"prec"     return PERCENT_NO_DEFAULT_PREC;
+  "%no"[-_]"lines"                 return PERCENT_NO_LINES;
+  "%nonassoc"                      return PERCENT_NONASSOC;
+  "%nondeterministic-parser"               return 
PERCENT_NONDETERMINISTIC_PARSER;
+  "%nterm"                         return PERCENT_NTERM;
+  "%output"                        return PERCENT_OUTPUT;
+  "%parse-param"                   return PERCENT_PARSE_PARAM;
+  "%prec"                          return PERCENT_PREC;
+  "%printer"                       return PERCENT_PRINTER;
+  "%pure"[-_]"parser"              return PERCENT_PURE_PARSER;
+  "%require"                       return PERCENT_REQUIRE;
+  "%right"                         return PERCENT_RIGHT;
+  "%skeleton"                      return PERCENT_SKELETON;
+  "%start"                         return PERCENT_START;
+  "%term"                          return PERCENT_TOKEN;
+  "%token"                         return PERCENT_TOKEN;
+  "%token"[-_]"table"              return PERCENT_TOKEN_TABLE;
+  "%type"                          return PERCENT_TYPE;
+  "%union"                         return PERCENT_UNION;
+  "%verbose"                       return PERCENT_VERBOSE;
+  "%yacc"                          return PERCENT_YACC;

   {directive} {
     complain_at (*loc, _("invalid directive: %s"), quote (yytext));
@@ -220,7 +214,7 @@
   ";"                     return SEMICOLON;

   {id} {
-    val->symbol = symbol_get (yytext, *loc);
+    val->uniqstr = uniqstr_new (yytext);
     id_loc = *loc;
     BEGIN SC_AFTER_IDENTIFIER;
   }
@@ -248,7 +242,6 @@
     if (current_rule && current_rule->action)
       grammar_midrule_action ();
     STRING_GROW;
-    token_type = BRACED_CODE;
     braces_level = 0;
     code_start = loc->start;
     BEGIN SC_BRACED_CODE;
@@ -372,15 +365,10 @@
     STRING_GROW;
     STRING_FINISH;
     loc->start = token_start;
-    val->symbol = symbol_get (quotearg_style (escape_quoting_style,
-                                             last_string),
-                             *loc);
-    symbol_class_set (val->symbol, token_sym, *loc, false);
-    last_string_1 = last_string[1];
-    symbol_user_token_number_set (val->symbol, last_string_1, *loc);
+    val->character = last_string[1];
     STRING_FREE;
     BEGIN INITIAL;
-    return ID;
+    return CHAR;
   }
   \n           unexpected_newline (token_start, "'");  BEGIN INITIAL;
   <<EOF>>      unexpected_eof (token_start, "'");      BEGIN INITIAL;
@@ -501,45 +489,11 @@
 }


-  /*---------------------------------------------------------------.
-  | Scanning after %union etc., possibly followed by white space.  |
-  | For %union only, allow arbitrary C code to appear before the   |
-  | following brace, as an extension to POSIX.                    |
-  `---------------------------------------------------------------*/
-
-<SC_PRE_CODE>
-{
-  . {
-    bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
-    scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
-    yyless (0);
-
-    if (valid)
-      {
-       braces_level = -1;
-       code_start = loc->start;
-       BEGIN SC_BRACED_CODE;
-      }
-    else
-      {
-       complain_at (*loc, _("missing `{' in %s"),
-                    token_name (token_type));
-       obstack_sgrow (&obstack_for_string, "{}");
-       STRING_FINISH;
-       val->chars = last_string;
-       BEGIN INITIAL;
-       return token_type;
-      }
-  }
-
-  <<EOF>>  unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
-}
-

-  /*---------------------------------------------------------------.
-  | Scanning some code in braces (%union and actions). The initial |
-  | "{" is already eaten.                                          |
-  `---------------------------------------------------------------*/
+  /*-----------------------------------------------------------.
+  | Scanning some code in braces (actions). The initial "{" is |
+  | already eaten.                                             |
+  `-----------------------------------------------------------*/

 <SC_BRACED_CODE>
 {
@@ -556,7 +510,7 @@
        val->chars = last_string;
        gram_last_braced_code_loc = *loc;
        BEGIN INITIAL;
-       return token_type;
+       return BRACED_CODE;
       }
   }

Index: data/c.m4
===================================================================
RCS file: /cvsroot/bison/bison/data/c.m4,v
retrieving revision 1.56
diff -u -u -r1.56 c.m4
--- data/c.m4 21 May 2006 04:48:47 -0000 1.56
+++ data/c.m4 19 Jun 2006 16:46:06 -0000
@@ -88,7 +88,8 @@

 m4_define_default([b4_epilogue], [])

-
+# If the %union is not named, its name is YYSTYPE.
+m4_define_default([b4_union_name], [YYSTYPE])

 ## ------------------------ ##
 ## Pure/impure interfaces.  ##
@@ -196,7 +197,7 @@
 m4_define([_b4_define_flag_if],
 [m4_if([$1$2], $[1]$[2], [],
        [m4_fatal([$0: Invalid arguments: address@hidden)])dnl
-m4_define([b4_$3_if],
+m4_define([b4_$3_if],
           [b4_flag_if([$3], [$1], [$2])])])


Index: data/glr.c
===================================================================
RCS file: /cvsroot/bison/bison/data/glr.c,v
retrieving revision 1.178
diff -u -u -r1.178 glr.c
--- data/glr.c 30 May 2006 23:45:04 -0000 1.178
+++ data/glr.c 19 Jun 2006 16:46:06 -0000
@@ -173,7 +173,7 @@

 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 ]m4_ifdef([b4_stype],
-[typedef union m4_bregexp(b4_stype, [^{], [YYSTYPE ])
+[typedef union b4_union_name
 b4_stype
 /* Line __line__ of glr.c.  */
 b4_syncline(address@hidden@], address@hidden@])
Index: data/yacc.c
===================================================================
RCS file: /cvsroot/bison/bison/data/yacc.c,v
retrieving revision 1.142
diff -u -u -r1.142 yacc.c
--- data/yacc.c 27 May 2006 00:28:17 -0000 1.142
+++ data/yacc.c 19 Jun 2006 16:46:06 -0000
@@ -185,7 +185,7 @@

 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 ]m4_ifdef([b4_stype],
-[typedef union[]m4_bregexp(b4_stype, [^{], [ YYSTYPE])
+[typedef union b4_union_name
 b4_stype
 /* Line __line__ of yacc.c.  */
 b4_syncline(address@hidden@], address@hidden@])
@@ -1495,7 +1495,7 @@

 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 m4_ifdef([b4_stype],
-[typedef union[]m4_bregexp(b4_stype, [^{], [ YYSTYPE])
+[typedef union b4_union_name
 b4_stype
 /* Line __line__ of yacc.c.  */
 b4_syncline(address@hidden@], address@hidden@])
[Prev in Thread]
Current Thread
[Next in Thread]
Re: Dubious features, (continued)
Prev by Date: Re: %union ... {
Next by Date: Re: Something is wrong with the translation project
Previous by thread: Re: %union ... {
Next by thread: Re: %union ... {
Index(es):
- Date
- Thread