[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: %union ... {
From: |
Akim Demaille |
Subject: |
Re: %union ... { |
Date: |
Mon, 19 Jun 2006 18:49:38 +0200 |
User-agent: |
Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux) |
Here is an updated proposal. To be able to use the ID without
triggering its registration as a nonterminal, I had to extract the
conversion from string to symbol from the scanner to the parser. I
prefer this way anyway, that's so much more flexible.
I'm sorry for the long useless portions of the patch due to reindent
and sorting.
I'm installing it.
Index: ChangeLog
from Akim Demaille <address@hidden>
* src/scan-gram.l: No longer "parse" things after `%union' until
`{'. Rather, return a single "%union" token.
No longer make symbols: return strings, and leave the conversion
to symbols to the parser.
(SC_PRE_CODE, token_type): Remove.
* src/parse-gram.y (%union): New field `character'.
Sort tokens.
(CHAR): New token.
(ID, ID_COLON): Now that the scanner no longer makes them
identifiers, adjust all uses to invoke symbol_get.
(id_colon): New, wraps the conversion from string to symbol.
(%union): Accept a possible union_name.
(symbol): Now can be a char.
* data/c.m4 (b4_union_name): Leave a default value.
* data/glr.c, data/yacc.c: Use it.
Index: src/parse-gram.y
===================================================================
RCS file: /cvsroot/bison/bison/src/parse-gram.y,v
retrieving revision 1.75
diff -u -u -r1.75 parse-gram.y
--- src/parse-gram.y 6 Jun 2006 16:40:06 -0000 1.75
+++ src/parse-gram.y 19 Jun 2006 16:46:03 -0000
@@ -99,6 +99,7 @@
char *chars;
assoc assoc;
uniqstr uniqstr;
+ unsigned char character;
};
/* Define the tokens together with their human representation. */
@@ -113,8 +114,6 @@
%token PERCENT_DESTRUCTOR "%destructor"
%token PERCENT_PRINTER "%printer"
-%token PERCENT_UNION "%union {...}"
-
%token PERCENT_LEFT "%left"
%token PERCENT_RIGHT "%right"
%token PERCENT_NONASSOC "%nonassoc"
@@ -157,35 +156,37 @@
PERCENT_YACC "%yacc"
;
-%token TYPE "type"
+%token BRACED_CODE "{...}"
+%token CHAR "char"
+%token EPILOGUE "epilogue"
%token EQUAL "="
-%token SEMICOLON ";"
-%token PIPE "|"
%token ID "identifier"
%token ID_COLON "identifier:"
%token PERCENT_PERCENT "%%"
+%token PIPE "|"
%token PROLOGUE "%{...%}"
-%token EPILOGUE "epilogue"
-%token BRACED_CODE "{...}"
+%token SEMICOLON ";"
+%token TYPE "type"
+
+%type <character> CHAR
+%printer { fprintf (stderr, "'%c' (%d)", $$, $$); } CHAR
-%type <chars> STRING string_content
- "{...}"
- "%union {...}"
- PROLOGUE EPILOGUE
-%printer { fprintf (stderr, "\"%s\"", $$); }
- STRING string_content
-%printer { fprintf (stderr, "{\n%s\n}", $$); }
- "{...}"
- "%union {...}"
- PROLOGUE EPILOGUE
-%type <uniqstr> TYPE
+%type <chars> STRING string_content "{...}" PROLOGUE EPILOGUE
+%printer { fprintf (stderr, "\"%s\"", $$); } STRING string_content
+%printer { fprintf (stderr, "{\n%s\n}", $$); } "{...}" PROLOGUE EPILOGUE
+
+%type <uniqstr> TYPE ID ID_COLON
%printer { fprintf (stderr, "<%s>", $$); } TYPE
+%printer { fprintf (stderr, "%s", $$); } ID
+%printer { fprintf (stderr, "%s:", $$); } ID_COLON
+
%type <integer> INT
%printer { fprintf (stderr, "%d", $$); } INT
-%type <symbol> ID symbol string_as_id
-%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
-%type <symbol> ID_COLON
-%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
+
+%type <symbol> id id_colon symbol string_as_id
+%printer { fprintf (stderr, "%s", $$->tag); } id symbol string_as_id
+%printer { fprintf (stderr, "%s:", $$->tag); } id_colon
+
%type <assoc> precedence_declarator
%type <list> symbols.1
%%
@@ -252,22 +253,6 @@
{
grammar_start_symbol_set ($2, @2);
}
-| "%union {...}"
- {
- char const *body = $1;
-
- if (typed)
- {
- /* Concatenate the union bodies, turning the first one's
- trailing '}' into '\n', and omitting the second one's '{'. */
- char *code = muscle_find ("stype");
- code[strlen (code) - 1] = '\n';
- body++;
- }
-
- typed = true;
- muscle_code_grow ("stype", body, @1);
- }
| "%destructor" "{...}" symbols.1
{
symbol_list *list;
@@ -294,6 +279,40 @@
}
;
+
+/*----------*
+ | %union. |
+ *----------*/
+
+%token PERCENT_UNION "%union";
+
+union_name:
+ /* Nothing. */ {}
+| ID { muscle_code_grow ("union_name", $1, @1); }
+;
+
+grammar_declaration:
+ "%union" union_name "{...}"
+ {
+ char const *body = $3;
+
+ if (typed)
+ {
+ /* Concatenate the union bodies, turning the first one's
+ trailing '}' into '\n', and omitting the second one's '{'. */
+ char *code = muscle_find ("stype");
+ code[strlen (code) - 1] = '\n';
+ body++;
+ }
+
+ typed = true;
+ muscle_code_grow ("stype", body, @3);
+ }
+;
+
+
+
+
symbol_declaration:
"%nterm" { current_class = nterm_sym; } symbol_defs.1
{
@@ -352,24 +371,24 @@
{
current_type = $1;
}
-| ID
+| id
{
symbol_class_set ($1, current_class, @1, true);
symbol_type_set ($1, current_type, @1);
}
-| ID INT
+| id INT
{
symbol_class_set ($1, current_class, @1, true);
symbol_type_set ($1, current_type, @1);
symbol_user_token_number_set ($1, $2, @2);
}
-| ID string_as_id
+| id string_as_id
{
symbol_class_set ($1, current_class, @1, true);
symbol_type_set ($1, current_type, @1);
symbol_make_alias ($1, $2, @$);
}
-| ID INT string_as_id
+| id INT string_as_id
{
symbol_class_set ($1, current_class, @1, true);
symbol_type_set ($1, current_type, @1);
@@ -406,7 +425,7 @@
;
rules:
- ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
+ id_colon { current_lhs = $1; current_lhs_location = @1; } rhses.1
;
rhses.1:
@@ -431,9 +450,32 @@
{ grammar_current_rule_merge_set ($3, @3); }
;
+
+/*---------------*
+ | Identifiers. |
+ *---------------*/
+
+/* Identifiers are return as uniqstr by the scanner. Depending on
+ their use, we may need to make them genuine symbols. */
+
+id:
+ ID { $$ = symbol_get ($1, @1); }
+| CHAR { char cp[4] = { '\'', $1, '\'', 0 };
+ $$ = symbol_get (quotearg_style (escape_quoting_style, cp),
+ @1);
+ symbol_class_set ($$, token_sym, @1, false);
+ symbol_user_token_number_set ($$, $1, @1);
+ }
+;
+
+id_colon:
+ ID_COLON { $$ = symbol_get ($1, @1); }
+;
+
+
symbol:
- ID { $$ = $1; }
-| string_as_id { $$ = $1; }
+ id
+| string_as_id
;
/* A string used as an ID: quote it. */
Index: src/scan-gram.l
===================================================================
RCS file: /cvsroot/bison/bison/src/scan-gram.l,v
retrieving revision 1.90
diff -u -u -r1.90 scan-gram.l
--- src/scan-gram.l 7 Jun 2006 21:17:35 -0000 1.90
+++ src/scan-gram.l 19 Jun 2006 16:46:03 -0000
@@ -88,9 +88,6 @@
/* A identifier was just read in directives/rules. Special state
to capture the sequence `identifier :'. */
%x SC_AFTER_IDENTIFIER
- /* A keyword that should be followed by some code was read (e.g.
- %printer). */
-%x SC_PRE_CODE
/* Three types of user code:
- prologue (code between `%{' `%}' in the first section, before %%);
@@ -124,9 +121,6 @@
/* Parent context state, when applicable. */
int context_state IF_LINT (= 0);
- /* Token type to return, when applicable. */
- int token_type IF_LINT (= 0);
-
/* Location of most recent identifier, when applicable. */
location id_loc IF_LINT (= empty_location);
@@ -145,7 +139,7 @@
| Scanning white space. |
`-----------------------*/
-<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
+<INITIAL,SC_AFTER_IDENTIFIER>
{
/* Comments and white space. */
"," warn_at (*loc, _("stray `,' treated as white space"));
@@ -170,46 +164,46 @@
`----------------------------*/
<INITIAL>
{
- "%binary" return PERCENT_NONASSOC;
- "%debug" return PERCENT_DEBUG;
- "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
- "%define" return PERCENT_DEFINE;
- "%defines" return PERCENT_DEFINES;
- "%destructor" /* FIXME: Remove once %union handled
differently. */ token_type = BRACED_CODE; return PERCENT_DESTRUCTOR;
- "%dprec" return PERCENT_DPREC;
- "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
- "%expect" return PERCENT_EXPECT;
- "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
- "%file-prefix" return PERCENT_FILE_PREFIX;
+ "%binary" return PERCENT_NONASSOC;
+ "%debug" return PERCENT_DEBUG;
+ "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
+ "%define" return PERCENT_DEFINE;
+ "%defines" return PERCENT_DEFINES;
+ "%destructor" return PERCENT_DESTRUCTOR;
+ "%dprec" return PERCENT_DPREC;
+ "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
+ "%expect" return PERCENT_EXPECT;
+ "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
+ "%file-prefix" return PERCENT_FILE_PREFIX;
"%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
- "%initial-action" /* FIXME: Remove once %union handled differently.
*/ token_type = BRACED_CODE; return PERCENT_INITIAL_ACTION;
- "%glr-parser" return PERCENT_GLR_PARSER;
- "%left" return PERCENT_LEFT;
- "%lex-param" /* FIXME: Remove once %union handled differently. */
token_type = BRACED_CODE; return PERCENT_LEX_PARAM;
- "%locations" return PERCENT_LOCATIONS;
- "%merge" return PERCENT_MERGE;
- "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
- "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
- "%no"[-_]"lines" return PERCENT_NO_LINES;
- "%nonassoc" return PERCENT_NONASSOC;
- "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
- "%nterm" return PERCENT_NTERM;
- "%output" return PERCENT_OUTPUT;
- "%parse-param" /* FIXME: Remove once %union handled differently. */
token_type = BRACED_CODE; return PERCENT_PARSE_PARAM;
- "%prec" return PERCENT_PREC;
- "%printer" /* FIXME: Remove once %union handled differently.
*/ token_type = BRACED_CODE; return PERCENT_PRINTER;
- "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
- "%require" return PERCENT_REQUIRE;
- "%right" return PERCENT_RIGHT;
- "%skeleton" return PERCENT_SKELETON;
- "%start" return PERCENT_START;
- "%term" return PERCENT_TOKEN;
- "%token" return PERCENT_TOKEN;
- "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
- "%type" return PERCENT_TYPE;
- "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
- "%verbose" return PERCENT_VERBOSE;
- "%yacc" return PERCENT_YACC;
+ "%initial-action" return PERCENT_INITIAL_ACTION;
+ "%glr-parser" return PERCENT_GLR_PARSER;
+ "%left" return PERCENT_LEFT;
+ "%lex-param" return PERCENT_LEX_PARAM;
+ "%locations" return PERCENT_LOCATIONS;
+ "%merge" return PERCENT_MERGE;
+ "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
+ "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
+ "%no"[-_]"lines" return PERCENT_NO_LINES;
+ "%nonassoc" return PERCENT_NONASSOC;
+ "%nondeterministic-parser" return
PERCENT_NONDETERMINISTIC_PARSER;
+ "%nterm" return PERCENT_NTERM;
+ "%output" return PERCENT_OUTPUT;
+ "%parse-param" return PERCENT_PARSE_PARAM;
+ "%prec" return PERCENT_PREC;
+ "%printer" return PERCENT_PRINTER;
+ "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
+ "%require" return PERCENT_REQUIRE;
+ "%right" return PERCENT_RIGHT;
+ "%skeleton" return PERCENT_SKELETON;
+ "%start" return PERCENT_START;
+ "%term" return PERCENT_TOKEN;
+ "%token" return PERCENT_TOKEN;
+ "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
+ "%type" return PERCENT_TYPE;
+ "%union" return PERCENT_UNION;
+ "%verbose" return PERCENT_VERBOSE;
+ "%yacc" return PERCENT_YACC;
{directive} {
complain_at (*loc, _("invalid directive: %s"), quote (yytext));
@@ -220,7 +214,7 @@
";" return SEMICOLON;
{id} {
- val->symbol = symbol_get (yytext, *loc);
+ val->uniqstr = uniqstr_new (yytext);
id_loc = *loc;
BEGIN SC_AFTER_IDENTIFIER;
}
@@ -248,7 +242,6 @@
if (current_rule && current_rule->action)
grammar_midrule_action ();
STRING_GROW;
- token_type = BRACED_CODE;
braces_level = 0;
code_start = loc->start;
BEGIN SC_BRACED_CODE;
@@ -372,15 +365,10 @@
STRING_GROW;
STRING_FINISH;
loc->start = token_start;
- val->symbol = symbol_get (quotearg_style (escape_quoting_style,
- last_string),
- *loc);
- symbol_class_set (val->symbol, token_sym, *loc, false);
- last_string_1 = last_string[1];
- symbol_user_token_number_set (val->symbol, last_string_1, *loc);
+ val->character = last_string[1];
STRING_FREE;
BEGIN INITIAL;
- return ID;
+ return CHAR;
}
\n unexpected_newline (token_start, "'"); BEGIN INITIAL;
<<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
@@ -501,45 +489,11 @@
}
- /*---------------------------------------------------------------.
- | Scanning after %union etc., possibly followed by white space. |
- | For %union only, allow arbitrary C code to appear before the |
- | following brace, as an extension to POSIX. |
- `---------------------------------------------------------------*/
-
-<SC_PRE_CODE>
-{
- . {
- bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
- scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
- yyless (0);
-
- if (valid)
- {
- braces_level = -1;
- code_start = loc->start;
- BEGIN SC_BRACED_CODE;
- }
- else
- {
- complain_at (*loc, _("missing `{' in %s"),
- token_name (token_type));
- obstack_sgrow (&obstack_for_string, "{}");
- STRING_FINISH;
- val->chars = last_string;
- BEGIN INITIAL;
- return token_type;
- }
- }
-
- <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
-}
-
- /*---------------------------------------------------------------.
- | Scanning some code in braces (%union and actions). The initial |
- | "{" is already eaten. |
- `---------------------------------------------------------------*/
+ /*-----------------------------------------------------------.
+ | Scanning some code in braces (actions). The initial "{" is |
+ | already eaten. |
+ `-----------------------------------------------------------*/
<SC_BRACED_CODE>
{
@@ -556,7 +510,7 @@
val->chars = last_string;
gram_last_braced_code_loc = *loc;
BEGIN INITIAL;
- return token_type;
+ return BRACED_CODE;
}
}
Index: data/c.m4
===================================================================
RCS file: /cvsroot/bison/bison/data/c.m4,v
retrieving revision 1.56
diff -u -u -r1.56 c.m4
--- data/c.m4 21 May 2006 04:48:47 -0000 1.56
+++ data/c.m4 19 Jun 2006 16:46:06 -0000
@@ -88,7 +88,8 @@
m4_define_default([b4_epilogue], [])
-
+# If the %union is not named, its name is YYSTYPE.
+m4_define_default([b4_union_name], [YYSTYPE])
## ------------------------ ##
## Pure/impure interfaces. ##
@@ -196,7 +197,7 @@
m4_define([_b4_define_flag_if],
[m4_if([$1$2], $[1]$[2], [],
[m4_fatal([$0: Invalid arguments: address@hidden)])dnl
-m4_define([b4_$3_if],
+m4_define([b4_$3_if],
[b4_flag_if([$3], [$1], [$2])])])
Index: data/glr.c
===================================================================
RCS file: /cvsroot/bison/bison/data/glr.c,v
retrieving revision 1.178
diff -u -u -r1.178 glr.c
--- data/glr.c 30 May 2006 23:45:04 -0000 1.178
+++ data/glr.c 19 Jun 2006 16:46:06 -0000
@@ -173,7 +173,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
]m4_ifdef([b4_stype],
-[typedef union m4_bregexp(b4_stype, [^{], [YYSTYPE ])
+[typedef union b4_union_name
b4_stype
/* Line __line__ of glr.c. */
b4_syncline(address@hidden@], address@hidden@])
Index: data/yacc.c
===================================================================
RCS file: /cvsroot/bison/bison/data/yacc.c,v
retrieving revision 1.142
diff -u -u -r1.142 yacc.c
--- data/yacc.c 27 May 2006 00:28:17 -0000 1.142
+++ data/yacc.c 19 Jun 2006 16:46:06 -0000
@@ -185,7 +185,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
]m4_ifdef([b4_stype],
-[typedef union[]m4_bregexp(b4_stype, [^{], [ YYSTYPE])
+[typedef union b4_union_name
b4_stype
/* Line __line__ of yacc.c. */
b4_syncline(address@hidden@], address@hidden@])
@@ -1495,7 +1495,7 @@
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
m4_ifdef([b4_stype],
-[typedef union[]m4_bregexp(b4_stype, [^{], [ YYSTYPE])
+[typedef union b4_union_name
b4_stype
/* Line __line__ of yacc.c. */
b4_syncline(address@hidden@], address@hidden@])
- Re: Dubious features, (continued)
- Re: Dubious features, Paul Eggert, 2006/06/08
- Re: Dubious features, Akim Demaille, 2006/06/08
- Re: Dubious features, Paul Eggert, 2006/06/08
- Re: Dubious features, Akim Demaille, 2006/06/10
- Re: Dubious features, Paul Eggert, 2006/06/11
- Re: %union ... {, Akim Demaille, 2006/06/16
- Re: %union ... {, Paul Eggert, 2006/06/16
- Re: %union ... {, Akim Demaille, 2006/06/17
- Re: %union ... {, Paul Eggert, 2006/06/19
- Re: %union ... {, Hans Aberg, 2006/06/19
- Re: %union ... {,
Akim Demaille <=
- Re: %union ... {, Paul Eggert, 2006/06/19
- Re: %union ... {, Akim Demaille, 2006/06/20
- Problems with CVS, Akim Demaille, 2006/06/20
- Re: Problems with CVS, Paul Eggert, 2006/06/20
- Re: %union ... {, Joel E. Denny, 2006/06/19
- Re: %union ... {, Akim Demaille, 2006/06/20
- Re: %union ... {, Hans Aberg, 2006/06/20