[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 5/6] lalr1.cc: add LAC support
From: |
Akim Demaille |
Subject: |
[PATCH 5/6] lalr1.cc: add LAC support |
Date: |
Fri, 9 Aug 2019 06:47:01 -0500 |
From: Adrian Vogelsgesang <address@hidden>
Implement lookahead correction (LAC) for the C++ skeleton. LAC is a
mechanism to make sure that we report the correct list of expected
tokens if a syntax error occurs. So far, LAC was only supported for
the C skeleton "yacc.c".
* data/skeletons/lalr1.cc: Add LAC support.
* doc/bison.texi: Update.
---
data/skeletons/lalr1.cc | 231 +++++++++++++++++++++++++++++++++++++---
doc/bison.texi | 4 +-
2 files changed, 216 insertions(+), 19 deletions(-)
diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc
index ddee9569..37c12f7d 100644
--- a/data/skeletons/lalr1.cc
+++ b/data/skeletons/lalr1.cc
@@ -20,6 +20,14 @@ m4_include(b4_skeletonsdir/[c++.m4])
# api.value.type=variant is valid.
m4_define([b4_value_type_setup_variant])
+# Check the value of %define parse.lac, where LAC stands for lookahead
+# correction.
+b4_percent_define_default([[parse.lac]], [[none]])
+b4_define_flag_if([lac])
+m4_define([b4_lac_flag],
+ [m4_if(b4_percent_define_get([[parse.lac]]),
+ [none], [[0]], [[1]])])
+
# b4_integral_parser_table_declare(TABLE-NAME, CONTENT, COMMENT)
# --------------------------------------------------------------
@@ -220,7 +228,18 @@ m4_define([b4_shared_declarations],
private:
/// This class is not copyable.
]b4_parser_class[ (const ]b4_parser_class[&);
- ]b4_parser_class[& operator= (const ]b4_parser_class[&);
+ ]b4_parser_class[& operator= (const ]b4_parser_class[&);]b4_lac_if([[
+
+ /// Check the lookahead yytoken.
+ /// \returns true iff the token will be eventually shifted.
+ bool yy_lac_check_ (int yytoken) const;
+ /// Establish the initial context if no initial context currently exists.
+ /// \returns true iff the token will be eventually shifted.
+ bool yy_lac_establish_ (int yytoken);
+ /// Discard any previous initial lookahead context because of event.
+ /// \param event the event which caused the lookahead to be discarded.
+ /// Only used for debbuging output.
+ void yy_lac_discard_ (const char* event);]])[
/// State numbers.
typedef int state_type;
@@ -344,7 +363,16 @@ m4_define([b4_shared_declarations],
typedef stack<stack_symbol_type> stack_type;
/// The stack.
- stack_type yystack_;
+ stack_type yystack_;]b4_lac_if([[
+ /// The stack for LAC.
+ /// Logically, the yy_lac_stack's lifetime is confined to the function
+ /// yy_lac_check_. We just store it as a member of this class to hold
+ /// on to the memory and to avoid frequent reallocations.
+ /// Since yy_lac_check_ is const, this member must be mutable.
+ mutable std::vector<state_type> yylac_stack_;
+ /// Whether an initial LAC context was established.
+ bool yy_lac_established_;
+]])[
/// Push a new state on the stack.
/// \param m a debug message to display
@@ -774,7 +802,11 @@ m4_if(b4_prefix, [yy], [],
stack_symbol_type yyerror_range[3];]])[
/// The return value of parse ().
- int yyresult;
+ int yyresult;]b4_lac_if([[
+
+ /// Discard the LAC context in case there still is one left from a
+ /// previous invocation.
+ yy_lac_discard_ ("init");]])[
#if YY_EXCEPTIONS
try
@@ -843,14 +875,21 @@ b4_dollar_popdef])[]dnl
to detect an error, take that action. */
yyn += yyla.type_get ();
if (yyn < 0 || yylast_ < yyn || yycheck_[yyn] != yyla.type_get ())
- goto yydefault;
+ {]b4_lac_if([[
+ if (!yy_lac_establish_ (yyla.type_get ()))
+ goto yyerrlab;]])[
+ goto yydefault;
+ }
// Reduce or error.
yyn = yytable_[yyn];
if (yyn <= 0)
{
if (yy_table_value_is_error_ (yyn))
- goto yyerrlab;
+ goto yyerrlab;]b4_lac_if([[
+ if (!yy_lac_establish_ (yyla.type_get ()))
+ goto yyerrlab;
+]])[
yyn = -yyn;
goto yyreduce;
}
@@ -860,7 +899,8 @@ b4_dollar_popdef])[]dnl
--yyerrstatus_;
// Shift the lookahead token.
- yypush_ ("Shifting", yyn, YY_MOVE (yyla));
+ yypush_ ("Shifting", yyn, YY_MOVE (yyla));]b4_lac_if([[
+ yy_lac_discard_ ("shift");]])[
goto yynewstate;
@@ -1020,7 +1060,8 @@ b4_dollar_popdef])[]dnl
yyerror_range[2].location = yyla.location;
YYLLOC_DEFAULT (error_token.location, yyerror_range, 2);]])[
- // Shift the error token.
+ // Shift the error token.]b4_lac_if([[
+ yy_lac_discard_ ("error recovery");]])[
error_token.state = yyn;
yypush_ ("Shifting", YY_MOVE (error_token));
}
@@ -1085,8 +1126,148 @@ b4_dollar_popdef])[]dnl
{
error (]b4_join(b4_locations_if([yyexc.location]),
[[yyexc.what ()]])[);
+ }]b4_lac_if([[
+
+ bool
+ ]b4_parser_class[::yy_lac_check_ (int yytoken) const
+ {
+ // Logically, the yylac_stack's lifetime is confined to this function.
+ // Clear it, to get rid of potential left-overs from previous call.
+ yylac_stack_.clear ();
+ // Reduce until we encounter a shift and thereby accept the token.
+#if ]b4_api_PREFIX[DEBUG
+ YYCDEBUG << "LAC: checking lookahead " << yytname_[yytoken] << ':';
+#endif
+ size_t lac_top = 0;
+ while (true)
+ {
+ state_type top_state = (yylac_stack_.empty ()
+ ? yystack_[lac_top].state
+ : yylac_stack_.back ());
+ int yyrule = yypact_[top_state];
+ if (yy_pact_value_is_default_ (yyrule)
+ || (yyrule += yytoken) < 0 || yylast_ < yyrule
+ || yycheck_[yyrule] != yytoken)
+ {
+ // Use the default action.
+ yyrule = yydefact_[top_state];
+ if (yyrule == 0)
+ {
+ YYCDEBUG << " Err\n";
+ return false;
+ }
+ }
+ else
+ {
+ // Use the action from yytable.
+ yyrule = yytable_[yyrule];
+ if (yy_table_value_is_error_ (yyrule))
+ {
+ YYCDEBUG << " Err\n";
+ return false;
+ }
+ if (0 < yyrule)
+ {
+ YYCDEBUG << " S" << yyrule << '\n';
+ return true;
+ }
+ yyrule = -yyrule;
+ }
+ // By now we know we have to simulate a reduce.
+ YYCDEBUG << " R" << yyrule - 1;
+ // Pop the corresponding number of values from the stack.
+ {
+ size_t yylen = yyr2_[yyrule];
+ // First pop from the LAC stack as many tokens as possible.
+ size_t lac_size = yylac_stack_.size ();
+ if (yylen < lac_size)
+ {
+ for (size_t i = 0; i < yylen; ++i)
+ yylac_stack_.pop_back ();
+ yylen = 0;
+ }
+ else if (lac_size)
+ {
+ yylac_stack_.clear ();
+ yylen -= lac_size;
+ }
+ // Only aftwerwards look at the main stack.
+ // We simulate popping elements by incrementing lac_top.
+ lac_top += yylen;
+ }
+ // Keep top_state in sync with the updated stack.
+ top_state = (yylac_stack_.empty ()
+ ? yystack_[lac_top].state
+ : yylac_stack_.back ());
+ // Push the resulting state of the reduction.
+ state_type state = yy_lr_goto_state_ (top_state, yyr1_[yyrule]);
+ YYCDEBUG << " G" << state;
+ yylac_stack_.push_back (state);
+ }
+ }
+
+ // Establish the initial context if no initial context currently exists.
+ bool
+ ]b4_parser_class[::yy_lac_establish_ (int yytoken)
+ {
+ /* Establish the initial context for the current lookahead if no initial
+ context is currently established.
+
+ We define a context as a snapshot of the parser stacks. We define
+ the initial context for a lookahead as the context in which the
+ parser initially examines that lookahead in order to select a
+ syntactic action. Thus, if the lookahead eventually proves
+ syntactically unacceptable (possibly in a later context reached via a
+ series of reductions), the initial context can be used to determine
+ the exact set of tokens that would be syntactically acceptable in the
+ lookahead's place. Moreover, it is the context after which any
+ further semantic actions would be erroneous because they would be
+ determined by a syntactically unacceptable token.
+
+ yy_lac_establish_ should be invoked when a reduction is about to be
+ performed in an inconsistent state (which, for the purposes of LAC,
+ includes consistent states that don't know they're consistent because
+ their default reductions have been disabled).
+
+ For parse.lac=full, the implementation of yy_lac_establish_ is as
+ follows. If no initial context is currently established for the
+ current lookahead, then check if that lookahead can eventually be
+ shifted if syntactic actions continue from the current context. */
+ if (!yy_lac_established_)
+ {
+#if ]b4_api_PREFIX[DEBUG
+ YYCDEBUG << "LAC: initial context established for "
+ << yytname_[yytoken] << '\n';
+#endif
+ yy_lac_established_ = true;
+ return yy_lac_check_ (yytoken);
+ }
+ return true;
}
+ // Discard any previous initial lookahead context.
+ void
+ ]b4_parser_class[::yy_lac_discard_ (const char* evt)
+ {
+ /* Discard any previous initial lookahead context because of Event,
+ which may be a lookahead change or an invalidation of the currently
+ established initial context for the current lookahead.
+
+ The most common example of a lookahead change is a shift. An example
+ of both cases is syntax error recovery. That is, a syntax error
+ occurs when the lookahead is syntactically erroneous for the
+ currently established initial context, so error recovery manipulates
+ the parser stacks to try to find a new initial context in which the
+ current lookahead is syntactically acceptable. If it fails to find
+ such a context, it discards the lookahead. */
+ if (yy_lac_established_)
+ {
+ YYCDEBUG << "LAC: initial context discarded due to "
+ << evt << '\n';
+ yy_lac_established_ = false;
+ }
+ }]])[
+
// Generate an error message.
std::string
]b4_parser_class[::yysyntax_error_ (]dnl
@@ -1115,24 +1296,40 @@ b4_error_verbose_if([state_type yystate, const
symbol_type& yyla],
a consistent state with a default action. There might have
been a previous inconsistent state, consistent state with a
non-default action, or user semantic action that manipulated
- yyla. (However, yyla is currently not documented for users.)
+ yyla. (However, yyla is currently not documented for
users.)]b4_lac_if([[
+ In the first two cases, it might appear that the current syntax
+ error should have been detected in the previous state when
+ yy_lac_check was invoked. However, at that time, there might
+ have been a different syntax error that discarded a different
+ initial context during error recovery, leaving behind the
+ current lookahead.]], [[
- Of course, the expected token list depends on states to have
correct lookahead information, and it depends on the parser not
to perform extra reductions after fetching a lookahead from the
- scanner and before detecting a syntax error. Thus, state
- merging (from LALR or IELR) and default reductions corrupt the
- expected token list. However, the list is correct for
- canonical LR with one exception: it will still contain any
- token that will not be accepted due to an error action in a
- later state.
+ scanner and before detecting a syntax error. Thus, state merging
+ (from LALR or IELR) and default reductions corrupt the expected
+ token list. However, the list is correct for canonical LR with
+ one exception: it will still contain any token that will not be
+ accepted due to an error action in a later state.]])[
*/
if (!yyla.empty ())
{
int yytoken = yyla.type_get ();
- yyarg[yycount++] = yytname_[yytoken];
+ yyarg[yycount++] = yytname_[yytoken];]b4_lac_if([[
+
+#if ]b4_api_PREFIX[DEBUG
+ // Execute LAC once. We don't care if it is succesful, we
+ // only do it for the sake of debugging output.
+ if (!yy_lac_established_)
+ yy_lac_check_ (yytoken);
+#endif]])[
+
int yyn = yypact_[yystate];
if (!yy_pact_value_is_default_ (yyn))
- {
+ {]b4_lac_if([[
+ for (int yyx = 0; yyx < yyntokens_; ++yyx)
+ if (yyx != yyterror_ && yy_lac_check_(yyx))
+ {]], [[
/* Start YYX at -YYN if negative to avoid negative indexes in
YYCHECK. In other words, skip the first -YYN actions for
this state because they are default actions. */
@@ -1143,7 +1340,7 @@ b4_error_verbose_if([state_type yystate, const
symbol_type& yyla],
for (int yyx = yyxbegin; yyx < yyxend; ++yyx)
if (yycheck_[yyx + yyn] == yyx && yyx != yyterror_
&& !yy_table_value_is_error_ (yytable_[yyx + yyn]))
- {
+ {]])[
if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
{
yycount = 1;
diff --git a/doc/bison.texi b/doc/bison.texi
index c0b14383..9b6981d3 100644
--- a/doc/bison.texi
+++ b/doc/bison.texi
@@ -8697,7 +8697,7 @@ Enable LAC to improve syntax error handling.
@item @code{none} (default)
@item @code{full}
@end itemize
-This feature is currently only available for deterministic parsers in C.
+This feature is currently only available for deterministic parsers in C and
C++.
@end deffn
Conceptually, the LAC mechanism is straight-forward. Whenever the parser
@@ -11685,7 +11685,7 @@ location (if enabled) being @var{yylval} and
@var{yylloc}. Invocations of
Note that when using variants, the interface for @code{yylex} is the same,
but @code{yylval} is handled differently.
-Regular union-based code in Lex scanner typically look like:
+Regular union-based code in Lex scanner typically looks like:
@example
[0-9]+ @{
--
2.22.0
- [PATCH 5/6] lalr1.cc: add LAC support,
Akim Demaille <=