RFC: dogfooding: use api.value.type union

bison-patches
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RFC: dogfooding: use api.value.type union

From:	Akim Demaille
Subject:	RFC: dogfooding: use api.value.type union
Date:	Sat, 10 Nov 2018 16:15:20 +0100
I do believe that this is a better way to specify the semantic
types, I never liked %union; in C++ in particular it has shown
its limitations.

On the scanner side it is not too ugly to replace

    val->uniqstr = uniqstr_new (last_string);
    return TAG;

    val->code = last_string;
    return PROLOGUE;

with

    val->TAG = uniqstr_new (last_string);
    return TAG;

    val->PROLOGUE = last_string;
    return PROLOGUE;

even if it’s unconventional to see such tag name in unions.  IMHO,
it’s actually much clearer to have that consistency between the name
of the token, and that of the type.

I also like to move from

  {int} {
    val->INT = scan_integer (yytext, 10, *loc);
    return INT;
  }

to

  {int}   RETURN_VALUE (INT, scan_integer (yytext, 10, *loc));

with the right macro RETURN_VALUE.

Eventually, it would be nice to also have token constructors in C
and be able to write:

    return make_TAG (uniqstr_new (last_string));

However, I would happily read comments and opinions about this move.



commit 7a8fd85df1b1f11956fd851ccac3a9cf5e9bc8f1
Author: Akim Demaille <address@hidden>
Date:   Sat Nov 10 15:48:39 2018 +0100

    dogfooding: use api.value.type union
    
    * src/parse-gram.y (api.value.type): Set to union.
    Replace occurrences of %union with explicit %types.
    * src/scan-gram.l: Adjust yylval's field names.
    (RETURN_VALUE): No longer needs the Field argument.
    Use it more.

diff --git a/src/parse-gram.y b/src/parse-gram.y
index 9c84853f..74014826 100644
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -97,6 +97,7 @@
 
 %define api.prefix {gram_}
 %define api.pure full
+%define api.value.type union
 %define locations
 %define parse.error verbose
 %define parse.lac full
@@ -180,16 +181,13 @@
 %token TAG_ANY         "<*>"
 %token TAG_NONE        "<>"
 
-%union {unsigned char character;}
-%type <character> CHAR
-%printer { fputs (char_name ($$), yyo); } CHAR
+%type <unsigned char> CHAR
+%printer { fputs (char_name ($$), yyo); } <unsigned char>
 
-%union {char *code;};
-%type <code> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
+%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
 %printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING
-%printer { fprintf (yyo, "{\n%s\n}", $$); } <code>
+%printer { fprintf (yyo, "{\n%s\n}", $$); } <char*>
 
-%union {uniqstr uniqstr;}
 %type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag variable
 %printer { fputs ($$, yyo); } <uniqstr>
 %printer { fprintf (yyo, "[%s]", $$); } BRACKETED_ID
@@ -197,23 +195,18 @@
 %printer { fprintf (yyo, "%%%s", $$); } PERCENT_FLAG
 %printer { fprintf (yyo, "<%s>", $$); } TAG tag
 
-%union {int integer;};
-%token <integer> INT "integer"
-%printer { fprintf (yyo, "%d", $$); } <integer>
+%token <int> INT "integer"
+%printer { fprintf (yyo, "%d", $$); } <int>
 
-%union {symbol *symbol;}
-%type <symbol> id id_colon string_as_id symbol symbol.prec
-%printer { fprintf (yyo, "%s", $$->tag); } <symbol>
+%type <symbol*> id id_colon string_as_id symbol symbol.prec
+%printer { fprintf (yyo, "%s", $$->tag); } <symbol*>
 %printer { fprintf (yyo, "%s:", $$->tag); } id_colon
 
-%union {assoc assoc;};
 %type <assoc> precedence_declarator
 
-%union {symbol_list *list;}
-%type <list>  symbols.1 symbols.prec generic_symlist generic_symlist_item
+%type <symbol_list*>  symbols.1 symbols.prec generic_symlist 
generic_symlist_item
 
-%union {named_ref *named_ref;}
-%type <named_ref> named_ref.opt
+%type <named_ref*> named_ref.opt
 
 /*---------.
 | %param.  |
@@ -239,8 +232,7 @@
   static void add_param (param_type type, char *decl, location loc);
   static param_type current_param = param_none;
 };
-%union {param_type param;}
-%token <param> PERCENT_PARAM "%param";
+%token <param_type> PERCENT_PARAM "%param";
 %printer
 {
   switch ($$)
@@ -253,7 +245,7 @@
 #undef CASE
       case param_none: aver (false); break;
     }
-} <param>;
+} <param_type>;
 
 
                      /*==========\
@@ -405,9 +397,8 @@ grammar_declaration:
     }
 ;
 
-%type <code_type> code_props_type;
-%union {code_props_type code_type;};
-%printer { fprintf (yyo, "%s", code_props_type_string ($$)); } <code_type>;
+%type <code_props_type> code_props_type;
+%printer { fprintf (yyo, "%s", code_props_type_string ($$)); } 
<code_props_type>;
 code_props_type:
   "%destructor"  { $$ = destructor; }
 | "%printer"     { $$ = printer; }
@@ -642,16 +633,15 @@ variable:
 ;
 
 /* Some content or empty by default. */
-%code requires {#include "muscle-tab.h"};
-%union
-{
-  struct
+%code requires {
+  #include "muscle-tab.h"
+  typedef struct
   {
     char const *chars;
     muscle_kind kind;
-  } value;
+  } value_type;
 };
-%type <value> value;
+%type <value_type> value;
 %printer
 {
   switch ($$.kind)
@@ -660,7 +650,7 @@ variable:
     case muscle_keyword: fprintf (yyo,   "%s",   $$.chars); break;
     case muscle_string:  fprintf (yyo, "\"%s\"", $$.chars); break;
     }
-} <value>;
+} <value_type>;
 
 value:
   %empty  { $$.kind = muscle_keyword; $$.chars = ""; }
diff --git a/src/scan-gram.l b/src/scan-gram.l
index f508edca..da6398be 100644
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -53,15 +53,15 @@ static boundary scanner_cursor;
 static size_t no_cr_read (FILE *, char *, size_t);
 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
 
-#define RETURN_PERCENT_PARAM(Value)                     \
-  RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
+#define RETURN_PERCENT_PARAM(Value)             \
+  RETURN_VALUE(PERCENT_PARAM, param_ ## Value)
 
-#define RETURN_PERCENT_FLAG(Value)                              \
-  RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
+#define RETURN_PERCENT_FLAG(Value)                      \
+  RETURN_VALUE(PERCENT_FLAG, uniqstr_new (Value))
 
-#define RETURN_VALUE(Token, Field, Value)       \
+#define RETURN_VALUE(Token, Value)              \
   do {                                          \
-    val->Field = Value;                         \
+    val->Token = Value;                         \
     return Token;                               \
   } while (0)
 
@@ -134,6 +134,7 @@ letter    
[.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
 notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
 id        {letter}({letter}|[-0-9])*
 int       [0-9]+
+xint      0[xX][0-9abcdefABCDEF]+
 
 /* Zero or more instances of backslash-newline.  Following GCC, allow
    white space between the backslash and the newline.  */
@@ -281,20 +282,14 @@ eqopt    ([[:space:]]*=)?
   ";"                     return SEMICOLON;
 
   {id} {
-    val->uniqstr = uniqstr_new (yytext);
+    val->ID = uniqstr_new (yytext);
     id_loc = *loc;
     bracketed_id_str = NULL;
     BEGIN SC_AFTER_IDENTIFIER;
   }
 
-  {int} {
-    val->integer = scan_integer (yytext, 10, *loc);
-    return INT;
-  }
-  0[xX][0-9abcdefABCDEF]+ {
-    val->integer = scan_integer (yytext, 16, *loc);
-    return INT;
-  }
+  {int}      RETURN_VALUE (INT, scan_integer (yytext, 10, *loc));
+  {xint}     RETURN_VALUE (INT, scan_integer (yytext, 16, *loc));
 
   /* Identifiers may not start with a digit.  Yet, don't silently
      accept "1FOO" as "1 FOO".  */
@@ -437,7 +432,7 @@ eqopt    ([[:space:]]*=)?
       {
         if (INITIAL == bracketed_id_context_state)
           {
-            val->uniqstr = bracketed_id_str;
+            val->BRACKETED_ID = bracketed_id_str;
             bracketed_id_str = 0;
             *loc = bracketed_id_loc;
             return BRACKETED_ID;
@@ -464,7 +459,7 @@ eqopt    ([[:space:]]*=)?
 {
   . {
     ROLLBACK_CURRENT_TOKEN;
-    val->uniqstr = bracketed_id_str;
+    val->BRACKETED_ID = bracketed_id_str;
     bracketed_id_str = 0;
     *loc = bracketed_id_loc;
     BEGIN INITIAL;
@@ -517,10 +512,9 @@ eqopt    ([[:space:]]*=)?
 {
   "\"" {
     STRING_FINISH;
-    loc->start = token_start;
-    val->code = last_string;
     BEGIN INITIAL;
-    return STRING;
+    loc->start = token_start;
+    RETURN_VALUE (STRING, last_string);
   }
   <<EOF>>   unexpected_eof (token_start, "\"");
   "\n"      unexpected_newline (token_start, "\"");
@@ -536,14 +530,14 @@ eqopt    ([[:space:]]*=)?
   "'" {
     STRING_FINISH;
     loc->start = token_start;
-    val->character = last_string[0];
+    val->CHAR = last_string[0];
 
     /* FIXME: Eventually, make these errors.  */
     if (last_string[0] == '\0')
     {
       complain (loc, Wother, _("empty character literal"));
       /* '\0' seems dangerous even if we are about to complain.  */
-      val->character = '\'';
+      val->CHAR = '\'';
     }
     else if (last_string[1] != '\0')
       complain (loc, Wother,
@@ -570,7 +564,7 @@ eqopt    ([[:space:]]*=)?
       {
         STRING_FINISH;
         loc->start = token_start;
-        val->uniqstr = uniqstr_new (last_string);
+        val->TAG = uniqstr_new (last_string);
         STRING_FREE;
         BEGIN INITIAL;
         return TAG;
@@ -725,9 +719,8 @@ eqopt    ([[:space:]]*=)?
       {
         STRING_FINISH;
         loc->start = code_start;
-        val->code = last_string;
         BEGIN INITIAL;
-        return BRACED_CODE;
+        RETURN_VALUE (BRACED_CODE, last_string);
       }
   }
 }
@@ -740,9 +733,8 @@ eqopt    ([[:space:]]*=)?
       {
         STRING_FINISH;
         loc->start = code_start;
-        val->code = last_string;
         BEGIN INITIAL;
-        return BRACED_PREDICATE;
+        RETURN_VALUE (BRACED_PREDICATE, last_string);
       }
     else
       obstack_1grow (&obstack_for_string, '}');
@@ -758,9 +750,8 @@ eqopt    ([[:space:]]*=)?
   "%}" {
     STRING_FINISH;
     loc->start = code_start;
-    val->code = last_string;
     BEGIN INITIAL;
-    return PROLOGUE;
+    RETURN_VALUE (PROLOGUE, last_string);
   }
 
   <<EOF>>   unexpected_eof (code_start, "%}");
@@ -777,9 +768,8 @@ eqopt    ([[:space:]]*=)?
   <<EOF>> {
     STRING_FINISH;
     loc->start = code_start;
-    val->code = last_string;
     BEGIN INITIAL;
-    return EPILOGUE;
+    RETURN_VALUE (EPILOGUE, last_string);
   }
 }
[Prev in Thread]
Current Thread
[Next in Thread]
RFC: dogfooding: use api.value.type union, Akim Demaille <=
- Re: RFC: dogfooding: use api.value.type union, Paul Eggert, 2018/11/10
  - Re: RFC: dogfooding: use api.value.type union, Akim Demaille, 2018/11/11
    - Re: RFC: dogfooding: use api.value.type union, Paul Eggert, 2018/11/11
Prev by Date: style: clean up the scanner and parser
Next by Date: reader: no longer accept %define variable names in quotes
Previous by thread: style: clean up the scanner and parser
Next by thread: Re: RFC: dogfooding: use api.value.type union
Index(es):
- Date
- Thread