bug-texinfo
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Post release texi2any performance regression


From: Gavin Smith
Subject: Re: Post release texi2any performance regression
Date: Sat, 4 Nov 2023 18:53:34 +0000

On Sat, Nov 04, 2023 at 04:06:06PM +0000, Gavin Smith wrote:
> > To me the most likely reason would be that simple_parser has been
> > removed, such that calls of gdt/pgdt from perl are slower.
> 
> Thanks, I'll try to investigate this.

This is very significant for TEXINFO_XS=omit but makes no difference
for TEXINFO_XS=require, as I had suspected.

I patched in the reverse of commit 4a3d02c0fc1932350 (Patrice Dumas,
2023-10-20 15:19:17).

Before

$ time TEXINFO_XS=omit ../tp/texi2any.pl ../../libc/libc.texinfo 
creature.texi:309: warning: `.' or `,' must follow @xref, not f

real    0m31.445s
user    0m30.244s
sys     0m0.415s

After

$ time TEXINFO_XS=omit ../tp/texi2any.pl ../../libc/libc.texinfo 
creature.texi:309: warning: `.' or `,' must follow @xref, not f

real    0m42.092s
user    0m40.629s
sys     0m0.422s

I don't think we can tolerate this kind of slowdown, even for disabled
XS modules, as we have no assurance that XS modules being disabled is
rare enough not to worry about.

If there is a real problem, then we could fix it another way, maybe
by "locking" the parser configuration at certain points, making any
changes ineffective?

I would like to commit this, except that the
"perl -w t/init_files_tests.t translation_in_parser_in_translation"
test breaks.


diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index 964f264010..e8b4ecbee4 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -572,31 +572,8 @@ sub parser(;$$)
   my $parser = dclone(\%parser_default_configuration);
   bless $parser;
 
-  $parser->{'set'} = {};
-  if (defined($conf)) {
-    foreach my $key (keys(%$conf)) {
-      if (exists($parser_settable_configuration{$key})) {
-        # we keep registrar instead of copying on purpose, to reuse the object
-        if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) {
-          $parser->{$key} = dclone($conf->{$key});
-        } else {
-          $parser->{$key} = $conf->{$key};
-        }
-        if ($initialization_overrides{$key}) {
-          $parser->{'set'}->{$key} = $parser->{$key};
-        }
-      } else {
-        warn "ignoring parser configuration value \"$key\"\n";
-      }
-    }
-  }
-  # restrict variables found by get_conf, and set the values to the
-  # parser initialization values only.  What is found in the document
-  # has no effect.
-  foreach my $key 
(keys(%Texinfo::Common::default_parser_customization_values)) {
-    $parser->{'conf'}->{$key} = $parser->{$key};
-  }
-
+  _setup_conf($parser, $conf);
+  # This is not very useful in perl, but mimics the XS parser
   print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n"
     if ($parser->{'DEBUG'});
 
@@ -611,6 +588,7 @@ sub parser(;$$)
   $parser->{'close_paragraph_commands'} = {%default_close_paragraph_commands};
   $parser->{'close_preformatted_commands'} = {%close_preformatted_commands};
 
+  # following is common with simple_parser
   # other initializations
   $parser->{'definfoenclose'} = {};
   $parser->{'source_mark_counters'} = {};
@@ -637,6 +615,65 @@ sub parser(;$$)
   return $parser;
 }
 
+# simple parser initialization.  The only difference with a regular parser
+# is that the dynamical @-commands groups and indices information references
+# that are initialized in each regular parser are initialized once for all
+# and shared among simple parsers.  It is used in gdt() and this has a sizable
+# effect on performance.
+my $simple_parser_line_commands = dclone(\%line_commands);
+my $simple_parser_brace_commands = dclone(\%brace_commands);
+my $simple_parser_valid_nestings = dclone(\%default_valid_nestings);
+my $simple_parser_no_paragraph_commands = {%default_no_paragraph_commands};
+my $simple_parser_index_names = dclone(\%index_names);
+my $simple_parser_command_index = {%command_index};
+my $simple_parser_close_paragraph_commands = 
{%default_close_paragraph_commands};
+my $simple_parser_close_preformatted_commands = {%close_preformatted_commands};
+sub simple_parser(;$)
+{
+  my $conf = shift;
+
+  my $parser = dclone(\%parser_default_configuration);
+  bless $parser;
+
+  _setup_conf($parser, $conf);
+  # This is not very useful in perl, but mimics the XS parser
+  print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n"
+    if ($parser->{'DEBUG'});
+
+  $parser->{'line_commands'} = $simple_parser_line_commands;
+  $parser->{'brace_commands'} = $simple_parser_brace_commands;
+  $parser->{'valid_nestings'} = $simple_parser_valid_nestings;
+  $parser->{'no_paragraph_commands'} = $simple_parser_no_paragraph_commands;
+  $parser->{'index_names'} = $simple_parser_index_names;
+  $parser->{'command_index'} = $simple_parser_command_index;
+  $parser->{'close_paragraph_commands'} = 
$simple_parser_close_paragraph_commands;
+  $parser->{'close_preformatted_commands'} = 
$simple_parser_close_preformatted_commands;
+
+  # other initializations
+  $parser->{'definfoenclose'} = {};
+  $parser->{'source_mark_counters'} = {};
+  $parser->{'nesting_context'} = {%nesting_context_init};
+  $parser->{'nesting_context'}->{'basic_inline_stack'} = [];
+  $parser->{'nesting_context'}->{'basic_inline_stack_on_line'} = [];
+  $parser->{'nesting_context'}->{'basic_inline_stack_block'} = [];
+  $parser->{'nesting_context'}->{'regions_stack'} = [];
+  $parser->{'basic_inline_commands'} = {%default_basic_inline_commands};
+
+  $parser->_init_context_stack();
+
+  # turn the array to a hash for speed.  Not sure it really matters for such
+  # a small array.
+  foreach my $expanded_format(@{$parser->{'EXPANDED_FORMATS'}}) {
+    $parser->{'expanded_formats_hash'}->{$expanded_format} = 1;
+  }
+
+  if (not defined($parser->{'registrar'})) {
+    $parser->{'registrar'} = Texinfo::Report::new();
+  }
+
+  return $parser;
+}
+
 sub get_conf($$)
 {
   my ($self, $var) = @_;
@@ -935,6 +972,32 @@ sub registered_errors($)
 
 sub _setup_conf($$)
 {
+  my ($parser, $conf) = @_;
+
+  $parser->{'set'} = {};
+  if (defined($conf)) {
+    foreach my $key (keys(%$conf)) {
+      if (exists($parser_settable_configuration{$key})) {
+        # we keep registrar instead of copying on purpose, to reuse the object
+        if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) {
+          $parser->{$key} = dclone($conf->{$key});
+        } else {
+          $parser->{$key} = $conf->{$key};
+        }
+        if ($initialization_overrides{$key}) {
+          $parser->{'set'}->{$key} = $parser->{$key};
+        }
+      } else {
+        warn "ignoring parser configuration value \"$key\"\n";
+      }
+    }
+  }
+  # restrict variables found by get_conf, and set the values to the
+  # parser initialization values only.  What is found in the document
+  # has no effect.
+  foreach my $key 
(keys(%Texinfo::Common::default_parser_customization_values)) {
+    $parser->{'conf'}->{$key} = $parser->{$key};
+  }
 }
 
 # Following are the internal parsing subroutines.  The most important are
diff --git a/tp/Texinfo/Translations.pm b/tp/Texinfo/Translations.pm
index cba465f966..be2e197f5f 100644
--- a/tp/Texinfo/Translations.pm
+++ b/tp/Texinfo/Translations.pm
@@ -373,9 +373,8 @@ sub replace_convert_substrings($$;$)
     #  }
     #}
   }
-  my $parser = Texinfo::Parser::parser($parser_conf);
-
-  if ($customization_information->get_conf('DEBUG')) {
+  my $parser = Texinfo::Parser::simple_parser($parser_conf);
+  if ($parser->{'DEBUG'}) {
     print STDERR "IN TR PARSER '$texinfo_line'\n";
   }
 
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm 
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 1ced996e7a..7e3d9fe23b 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -76,6 +76,10 @@ sub get_conf($$)
   return $self->{'conf'}->{$var};
 }
 
+sub simple_parser {
+  goto &parser;
+}
+
 # Initialize the parser
 sub parser (;$$)
 {





reply via email to

[Prev in Thread] Current Thread [Next in Thread]