texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Tue, 1 Mar 2022 13:19:23 -0500 (EST)

branch: master
commit f99a28c33fb7f208853d1e2de8d18b4e3b107dac
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Mar 1 19:19:10 2022 +0100

    * tp/Texinfo/ParserNonXS.pm (parse_texi_file, _save_line_directive)
    (_next_text, _end_line): use a structure similar with source_info
    within the input for the current file informations. Like in the XS
    parser.  Add the information on the @include file name encoding.
    Use this information to decode the file name in an error message.
    
    * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_line): reindent.
---
 ChangeLog                            |  10 +++
 tp/TODO                              |  15 ++--
 tp/Texinfo/Convert/Converter.pm      |   2 +
 tp/Texinfo/ParserNonXS.pm            | 154 +++++++++++++++++++++++------------
 tp/Texinfo/XS/parsetexi/Parsetexi.pm |  28 +++----
 5 files changed, 133 insertions(+), 76 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 21e20e18be..124e73045d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2022-03-01  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (parse_texi_file, _save_line_directive)
+       (_next_text, _end_line): use a structure similar with source_info
+       within the input for the current file informations. Like in the XS
+       parser.  Add the information on the @include file name encoding.
+       Use this information to decode the file name in an error message.
+
+       * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_line): reindent.
+
 2022-03-01  Patrice Dumas  <pertusus@free.fr>
 
        Rename line_nr as source_info except when a line number
diff --git a/tp/TODO b/tp/TODO
index 6256e600c5..e7662a9f1c 100644
--- a/tp/TODO
+++ b/tp/TODO
@@ -30,21 +30,20 @@ bytes.  To check that they can never be upgraded + document
  INTERNAL_LINKS
 
 * ParserNonXS.pm
- l 3226 ParserNonXS.pm
-               unshift @{$self->{'input'}}, {
-                 'input_file_name' => $file,
-  -> sets file_name in _next_text: 'file_name' => $input->{'input_file_name'}
+ file name, in ->{'file_name'}
+ Can come from parse_texi_file, CPP line or @include file, through
+ $self->{'input'}, and $source_info, ends up in the tree in source_info
+ and in error messages through line_warn line_error.
+ $source_info->{'file_name'} passed through 
 
- l 2067 decode $previous_input->{'input_file_name'}
+ l 2071 decode
+         $previous_input->{'input_file_info'}->{'file_name'}, $!));
 
  $self->{'info'}->{'input_file_name'}
  $self->{'info'}->{'input_directory'}
 
 * XS Parser seems ok
 
-in input structure 
- name -> input_file_name
-
 check with latin1
 CPP directive with encoded characters, UTF8 Texinfo manual
 ./texi2any.pl -I ./t/include/ ./t/input_files/cpp_lines.texi
diff --git a/tp/Texinfo/Convert/Converter.pm b/tp/Texinfo/Convert/Converter.pm
index 3f0f2c0c5b..37804732e5 100644
--- a/tp/Texinfo/Convert/Converter.pm
+++ b/tp/Texinfo/Convert/Converter.pm
@@ -1794,6 +1794,8 @@ C<$output_file> is an empty string. I<$document_name> is 
C<$output_filename>
 without extension.  I<$input_basefile> is based on the input texinfo file name,
 with the file name portion only (without directory).
 
+The strings returned are text strings.
+
 =item ($caption, $prepended) = $converter->float_name_caption($float)
 X<C<float_name_caption>>
 
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index a6066f261a..113d51e101 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -37,7 +37,7 @@ use Carp qw(cluck);
 use Data::Dumper;
 
 # to detect if an encoding may be used to open the files
-use Encode qw(find_encoding);
+use Encode qw(find_encoding decode);
 
 # for fileparse
 use File::Basename;
@@ -211,23 +211,23 @@ my %parser_default_configuration = (
 #                         by @-commands.  For example documentlanguage.
 
 
-# A line information is an hash reference with the keys:
+# A source information is an hash reference with the keys:
 # line_nr        the line number
 # file_name      the file name
 # macro          if in a macro expansion, the name of the macro
 #
-# A text fragment information is a 2 element array reference, the first is the
-# text fragment, the second is the line information.
+# A text fragment with source information is a 2 element array reference,
+# the first is the text, the second is the source information.
 
 # The input structure is an array, the first is the most recently included
-# file.  The last element may be a file if the parsing is done on a file,
-# with parse_texi_file, or simply pending text, if called on text.
+# file.  The last element may corresponnd to a file if the parsing is done
+# on a file, with parse_texi_file, or simply pending text, if called on text.
 # each element of the array is a hash reference.  The key are:
-# pending    an array reference containing pending text fragments, either the
-#            text given in input or macro expansion text.
-# name       file name
-# line_nr    current line number in the file
-# fh         filehandle for the file
+# pending             an array reference containing pending text fragments
+#                     with source information, either the text given in
+#                     input or macro expansion text.
+# input_file_info     source information corresponding to the current file.
+# fh                  filehandle for the file.
 
 # The commands in initialization_overrides are not set in the document if
 # set at the parser initialization.
@@ -869,8 +869,9 @@ sub _text_to_lines($)
   return $lines;
 }
 
-# construct a text fragments array matching a lines array, based on information
-# supplied.
+# construct a text fragments with source information array matching
+# a lines array, based on information supplied.
+#
 # If $FIRST_LINE is undef, no line numbers are set.
 # Otherwise, if $FIXED_LINE_NUMBER is set the line number is not
 # increased, else it is increased, beginning at $FIRST_LINE.
@@ -920,6 +921,9 @@ sub _prepare_input_from_text($$;$$$$)
 
 # entry point for text fragments.
 # Used in some tests.
+# To be in line with the XS Parser,
+#  $lines_nr should only be an integer and text a string.
+#  $file, $macro, $fixed_line_number should never be used.
 sub parse_texi_piece($$;$$$$)
 {
   my ($self, $text, $lines_nr, $file, $macro, $fixed_line_number) = @_;
@@ -937,6 +941,9 @@ sub parse_texi_piece($$;$$$$)
   return $tree;
 }
 
+# To be in line with the XS Parser,
+#  $lines_nr should only be an integer and text a string.
+#  $file, $macro, $fixed_line_number should never be used.
 sub parse_texi_line($$;$$$$)
 {
   my ($self, $text, $lines_nr, $file, $macro, $fixed_line_number) = @_;
@@ -951,6 +958,9 @@ sub parse_texi_line($$;$$$$)
   return $tree;
 }
 
+# To be in line with the XS Parser,
+#  $lines_nr should only be an integer and text a string.
+#  $file, $macro, $fixed_line_number should never be used.
 sub parse_texi_text($$;$$$$)
 {
   my ($self, $text, $lines_nr, $file, $macro, $fixed_line_number) = @_;
@@ -963,6 +973,7 @@ sub parse_texi_text($$;$$$$)
   return $self->_parse_texi_document();
 }
 
+# $FILE_NAME the name of the opened file should be a binary string.
 sub _open_in {
   my ($self, $filehandle, $file_name) = @_;
 
@@ -986,6 +997,7 @@ sub _open_in {
 }
 
 # parse a texi file
+# $INPUT_FILE_PATH is the name of the parsed file should be a binary string.
 sub parse_texi_file($$)
 {
   my ($self, $input_file_path) = @_;
@@ -1006,10 +1018,12 @@ sub parse_texi_file($$)
   
   $self->{'input'} = [{
        'pending' => [],
-       'input_file_name' => $file_name,
-       'line_nr' => 0,
+       'input_file_info' => {
+          'file_name' => $file_name,
+          'line_nr' => 0,
+       },
        'fh' => $filehandle
-        }];
+    }];
 
   return $self->_parse_texi_document();
 }
@@ -1022,7 +1036,7 @@ sub _parse_texi_document($)
      = _setup_document_root_and_before_node_section();
 
   # it should be set by the first _next_text() call, so no need
-  # to preset it to a more precise line_nr structure.
+  # to preset it to a more precise source_info structure.
   my $source_info = undef;
 
   # put the empty lines and the \input line in a container at the beginning
@@ -1086,6 +1100,7 @@ sub global_commands_information($)
 # perl_encoding
 # input_encoding_name
 # input_file_name
+# input_directory
 sub global_information($)
 {
   my $self = shift;
@@ -1287,9 +1302,9 @@ sub _begin_preformatted($$)
   return $current;
 }
 
-# wrapper around line_warn.  Set line_nr to be the line_nr of the command,
-# corresponding to the opening of the command.  Call line_warn with
-# sprintf if needed.
+# wrapper around line_warn.  Set source_info to be the source_info of
+# the command, corresponding to the opening of the command.
+# Call line_warn with sprintf if needed.
 sub _command_warn($$$$;@)
 {
   my $self = shift;
@@ -1316,7 +1331,7 @@ sub _command_error($$$$;@)
 
   # use the beginning of the @-command for the error message
   # line number if available. 
-  # FIXME line_nr currently not registered for regular brace commands
+  # FIXME source_info currently not registered for regular brace commands
   if ($current->{'source_info'}) {
     $source_info = $current->{'source_info'};
   }
@@ -2004,17 +2019,17 @@ sub _save_line_directive
 
   my $input = $self->{'input'}->[0];
   return if !$input;
-  $input->{'line_nr'} = $line_nr if $line_nr;
+  $input->{'input_file_info'}->{'line_nr'} = $line_nr if $line_nr;
   # need to convert to bytes for file name
   if (defined($file_name)) {
     my ($encoded_file_name, $file_name_encoding)
        = _encode_file_name($self, $file_name);
-    $input->{'input_file_name'} = $encoded_file_name;
+    $input->{'input_file_info'}->{'file_name'} = $encoded_file_name;
   }
 }
 
-# returns next text fragment, be it pending from a macro expansion or 
-# text or file
+# returns next text fragment with source information, be it
+# pending from a macro expansion or pending text, or read from file.
 sub _next_text($$)
 {
   my ($self, $source_info) = @_;
@@ -2022,20 +2037,22 @@ sub _next_text($$)
   while (@{$self->{'input'}}) {
     my $input = $self->{'input'}->[0];
     if (@{$input->{'pending'}}) {
-      my $new_text = shift @{$input->{'pending'}};
-      if ($new_text->[1] and $new_text->[1]->{'end_macro'}) {
-        delete $new_text->[1]->{'end_macro'};
+      my $new_text_and_info = shift @{$input->{'pending'}};
+      if ($new_text_and_info->[1] and $new_text_and_info->[1]->{'end_macro'}) {
+        delete $new_text_and_info->[1]->{'end_macro'};
         my $top_macro = shift @{$self->{'macro_stack'}};
         print STDERR "SHIFT MACRO_STACK(@{$self->{'macro_stack'}}): 
$top_macro->{'args'}->[0]->{'text'}\n"
           if ($self->{'DEBUG'});
       }
-      return ($new_text->[0], $new_text->[1]);
+      # corresponds to (line, new source_info)
+      return ($new_text_and_info->[0], $new_text_and_info->[1]);
     } elsif ($input->{'fh'}) {
       my $input_error = 0;
       local $SIG{__WARN__} = sub {
         my $message = shift;
-        print STDERR "$input->{'input_file_name'}" . ":"
-               . ($input->{'line_nr'} + 1) . ": input error: $message";
+        print STDERR "$input->{'input_file_info'}->{'file_name'}" . ":"
+               . ($input->{'input_file_info'}->{'line_nr'} + 1)
+               . ": input error: $message";
         $input_error = 1;
       };
       my $fh = $input->{'fh'};
@@ -2053,19 +2070,33 @@ sub _next_text($$)
         }
         # DEL as comment character
         $line =~ s/\x{7F}.*\s*//;
-        $input->{'line_nr'}++;
-        return ($line, {'line_nr' => $input->{'line_nr'}, 
-            'file_name' => $input->{'input_file_name'},
-            'macro' => ''});
+        $input->{'input_file_info'}->{'line_nr'}++;
+        my $new_source_info = {
+          'line_nr' => $input->{'input_file_info'}->{'line_nr'},
+          'file_name' => $input->{'input_file_info'}->{'file_name'},
+          'macro' => ''};
+        return ($line, $new_source_info);
       }
     }
     my $previous_input = shift(@{$self->{'input'}});
     # Don't close STDIN
-    if ($previous_input->{'fh'} and $previous_input->{'input_file_name'} ne 
'-') {
+    if ($previous_input->{'fh'}
+        and $previous_input->{'input_file_info'}->{'file_name'} ne '-') {
       if (!close($previous_input->{'fh'})) {
+        # need to decode for error message
+        my $file_name_encoding;
+        if (defined($previous_input->{'file_name_encoding'})) {
+          $file_name_encoding = $previous_input->{'file_name_encoding'};
+        } else {
+          $file_name_encoding = $self->get_conf('DATA_INPUT_ENCODING_NAME');
+        }
+        my $file_name = $previous_input->{'input_file_info'}->{'file_name'};
+        if (defined($file_name_encoding)) {
+          $file_name = decode($file_name_encoding, $file_name);
+        }
         $self->{'registrar'}->document_warn($self,
                              sprintf(__("error on closing %s: %s"),
-                                $previous_input->{'input_file_name'}, $!));
+                                     $file_name, $!));
       }
     }
   }
@@ -2213,7 +2244,7 @@ sub _expand_macro_body($$$$) {
           }
         } else {
           $self->_line_error(sprintf(__(
-         "\\ in \@%s expansion followed `%s' instead of parameter name or 
\\"), 
+         "\\ in \@%s expansion followed `%s' instead of parameter name or \\"),
              $macro->{'element'}->{'args'}->[0]->{'text'}, $arg), 
$source_info);
           $result .= '\\' . $arg;
         }
@@ -3239,8 +3270,10 @@ sub _end_line($$$)
               my ($directories, $suffix);
               ($file, $directories, $suffix) = fileparse($file);
               unshift @{$self->{'input'}}, { 
-                'input_file_name' => $file,
-                'line_nr' => 0,
+                'input_file_info' => {'file_name' => $file,
+                                      'line_nr' => 0,
+                                     },
+                'file_name_encoding' => $file_name_encoding,
                 'pending' => [],
                 'fh' => $filehandle };
               # TODO note that it is bytes.  No reason to have it used much
@@ -3288,7 +3321,8 @@ sub _end_line($$$)
 
             $self->{'info'}->{'input_perl_encoding'} = $perl_encoding;
             foreach my $input (@{$self->{'input'}}) {
-              binmode($input->{'fh'}, ":encoding($perl_encoding)") if 
($input->{'fh'});
+              binmode($input->{'fh'}, ":encoding($perl_encoding)")
+                if ($input->{'fh'});
             }
           }
         } elsif ($command eq 'documentlanguage') {
@@ -3305,15 +3339,14 @@ sub _end_line($$$)
         # note that the argument to expand replaced @-commands is
         # set, such that @include that are removed from the tree
         # with type set to replaced are still shown in error messages.
-        my $texi_line 
+        my $texi_line
           = 
Texinfo::Convert::Texinfo::convert_to_texinfo($current->{'args'}->[0], 1);
         $texi_line =~ s/^\s*//a;
         $texi_line =~ s/\s*$//a;
 
-        $self->_command_error($current, $source_info, 
+        $self->_command_error($current, $source_info,
                        __("bad argument to \@%s: %s"),
                        $command, $texi_line);
-        
       }
     } elsif ($command eq 'node') {
       foreach my $arg (@{$current->{'args'}}) {
@@ -3692,7 +3725,7 @@ sub _check_line_directive {
       and $source_info->{'file_name'} ne ''
       and !$source_info->{'macro'}
       and $line =~ /^\s*#\s*(line)? (\d+)(( "([^"]+)")(\s+\d+)*)?\s*$/) {
-    _save_line_directive ($self, int($2), $5);
+    _save_line_directive($self, int($2), $5);
     return 1;
   }
   return 0;
@@ -3771,8 +3804,11 @@ sub _parse_texi($$$)
 
     if ($self->{'DEBUG'}) {
       my $line_text = '';
-      $line_text = "$source_info->{'line_nr'}.$source_info->{'macro'}" if 
($source_info);
-      print STDERR "NEW LINE(".join('|', 
$self->_get_context_stack()).":@{$self->{'conditionals_stack'}}:$line_text): 
$line";
+      $line_text = "$source_info->{'line_nr'}.$source_info->{'macro'}"
+         if ($source_info);
+      print STDERR "NEW LINE("
+         .join('|', $self->_get_context_stack())
+         .":@{$self->{'conditionals_stack'}}:$line_text): $line";
       #print STDERR "CONTEXT_STACK 
".join('|',$self->_get_context_stack())."\n";
     }
 
@@ -4069,12 +4105,17 @@ sub _parse_texi($$$)
         print STDERR "UNSHIFT MACRO_STACK: 
$expanded_macro->{'args'}->[0]->{'text'}\n"
           if ($self->{'DEBUG'});
         my $new_lines = _complete_line_nr($expanded_lines, 
-                            $source_info->{'line_nr'}, 
$source_info->{'file_name'},
-                            $expanded_macro->{'args'}->[0]->{'text'}, 1);
+                         $source_info->{'line_nr'}, 
$source_info->{'file_name'},
+                         $expanded_macro->{'args'}->[0]->{'text'}, 1);
         $source_info->{'end_macro'} = 1;
+        # first put the line that was interrupted by the macro call
+        # on the input pending text with information stack
         unshift @{$self->{'input'}->[0]->{'pending'}}, [$line, $source_info];
+        # current line is the first from macro expansion
         my $new_text = shift @$new_lines;
         ($line, $source_info) = ($new_text->[0], $new_text->[1]);
+        # then put the following macro expansion lines with information on the
+        # pending text with information stack
         unshift @{$self->{'input'}->[0]->{'pending'}}, @$new_lines;
 
       # Now handle all the cases that may lead to command closing
@@ -6273,7 +6314,7 @@ of a macro.
 X<C<parse_texi_file>>
 
 The file with name I<$file_name> is considered to be a Texinfo file and
-is parsed into a tree.
+is parsed into a tree.  I<$file_name> should be a binary string.
 
 undef is returned if the file couldn't be read.
 
@@ -6328,7 +6369,12 @@ C<input_perl_encoding> string is a corresponding Perl 
encoding name.
 
 =item input_file_name
 
-The name of the main Texinfo input file.
+=item input_directory
+
+The name of the main Texinfo input file and the associated directory.
+Binary strings.  They should come from the command line and can
+be decoded with the encoding in the customization variable
+C<DATA_INPUT_ENCODING_NAME>.
 
 =back
 
@@ -6574,7 +6620,7 @@ Is associated to a macro definition element
    'contents' => [{'text' => "coucou \arg\ after arg\n", 'type' => 'raw'}],
    'extra' => {'arg_line' => " mymacro{arg}\n", }}
 
-= item merged_indices
+=item merged_indices
 
 The associated hash reference holds merged indices information, each key
 is merged in the value.  Same as setting C<@synindex> or C<syncodeindex>.
@@ -6664,10 +6710,10 @@ containers, C<@node> and sectioning commands.
 
 The parent element.
 
-=item line_nr
+=item source_info
 
 An hash reference corresponding to information on the location of the
-element in the Texinfo input manual.  It should only be available for
+element in the Texinfo input manual.  It should mainly be available for
 @-command elements, and only for @-commands that are considered to be
 complex enough that the location in the document is needed, for example
 to prepare an error message.
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm 
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 08d1f130c4..e9512c3e5e 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -339,25 +339,25 @@ sub parse_texi_text($$;$$$$)
 # Replacement for Texinfo::Parser::parse_texi_line
 sub parse_texi_line($$;$$$$)
 {
-    my $self = shift;
-    my $text = shift;
-    my $lines_nr = shift;
-    my $file = shift;
-    my $macro = shift;
-    my $fixed_line_number = shift;
+  my $self = shift;
+  my $text = shift;
+  my $lines_nr = shift;
+  my $file = shift;
+  my $macro = shift;
+  my $fixed_line_number = shift;
 
-    return undef if (!defined($text));
+  return undef if (!defined($text));
 
-    $lines_nr = 1 if (not defined($lines_nr));
+  $lines_nr = 1 if (not defined($lines_nr));
 
-    $self = parser() if (!defined($self));
-    utf8::upgrade($text);
-    parse_string($text, $lines_nr);
-    my $tree = build_texinfo_tree ();
+  $self = parser() if (!defined($self));
+  utf8::upgrade($text);
+  parse_string($text, $lines_nr);
+  my $tree = build_texinfo_tree ();
 
-    _set_errors_node_lists_labels_indices($self);
+  _set_errors_node_lists_labels_indices($self);
 
-    return $tree;
+  return $tree;
 }
 
 # Public interfaces of Texinfo::Parser



reply via email to

[Prev in Thread] Current Thread [Next in Thread]