texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: Decode inputs for encoded characters


From: Patrice Dumas
Subject: branch master updated: Decode inputs for encoded characters
Date: Tue, 22 Feb 2022 16:36:56 -0500

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 1837347ed7 Decode inputs for encoded characters
1837347ed7 is described below

commit 1837347ed7398115e9defc0ee439c946e87866e7
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Feb 22 22:36:38 2022 +0100

    Decode inputs for encoded characters
    
    * doc/texinfo.texi (HTML CSS), tp/Texinfo/Convert/HTML.pm
    (_process_css_file): process @charset to get the encoding, and use
    utf-8 as default as described.
    
    * tp/texi2any.pl: decode more input from command line that are
    combined with messages or strings from document.
    Import Encode symbols explicitely.
---
 ChangeLog                                          |  12 ++
 doc/texinfo.texi                                   |  14 +-
 tp/Texinfo/Common.pm                               |   1 +
 tp/Texinfo/Convert/HTML.pm                         |  23 +++
 tp/Texinfo/ParserNonXS.pm                          |   2 +-
 tp/tests/formatting/Makefile.am                    |   3 +-
 "tp/tests/formatting/c\303\252ss.css"              |   4 +
 tp/tests/formatting/list-of-tests                  |   8 +
 "tp/tests/formatting/os\303\251.texi"              |  15 ++
 .../non_ascii_command_line/Chapteur.html           |  72 +++++++++
 .../res_parser/non_ascii_command_line/index.html   |  71 +++++++++
 .../non_ascii_command_line/int\303\251rnal.txt"    |   2 +
 .../os\303\251-texinfo.texi"                       |  13 ++
 .../non_ascii_command_line/os\303\251.1"           |   0
 .../non_ascii_command_line/os\303\251.2"           |   3 +
 .../non_ascii_command_line/os\303\251_abt.html"    | 164 +++++++++++++++++++++
 "tp/tests/formatting/\303\247ss.css"               |  10 ++
 tp/tests/run_parser_all.sh                         |   8 +-
 .../formatting_non_ascii_command_line.sh           |  19 +++
 tp/texi2any.pl                                     |  39 ++---
 20 files changed, 455 insertions(+), 28 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 05fd7fea35..eed1307152 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2022-02-22  Patrice Dumas  <pertusus@free.fr>
+
+       Decode inputs for encoded characters
+
+       * doc/texinfo.texi (HTML CSS), tp/Texinfo/Convert/HTML.pm
+       (_process_css_file): process @charset to get the encoding, and use
+       utf-8 as default as described.
+
+       * tp/texi2any.pl: decode more input from command line that are
+       combined with messages or strings from document.
+       Import Encode symbols explicitely.
+
 2022-02-22  Gavin Smith  <gavinsmith0123@gmail.com>
 
        Avoid double encoding error messages with XS parser
diff --git a/doc/texinfo.texi b/doc/texinfo.texi
index 5bf77482f1..d70e7ec36f 100644
--- a/doc/texinfo.texi
+++ b/doc/texinfo.texi
@@ -18369,6 +18369,11 @@ The option @option{--css-include=@var{file}} includes 
the contents
 details are somewhat tricky, as described in the following, to provide
 maximum flexibility.
 
+@cindex @samp{@@charset} specification, in CSS files
+The CSS file first line may be a @samp{@@charset} directive.  If present,
+this directive is used to determine the encoding of the CSS file.  The
+line is not copied into the output.
+
 @cindex @samp{@@import} specifications, in CSS files
 The CSS file may begin with so-called @samp{@@import} directives,
 which link to external CSS specifications for browsers to use when
@@ -18378,12 +18383,9 @@ explain how @command{makeinfo} handles them.
 
 @cindex Comments, in CSS files
 There can be more than one @samp{@@import}, but they have to come
-first in the file, with only whitespace and comments interspersed, no
-normal definitions.  (Technical exception: a @samp{@@charset}
-directive may precede the @samp{@@import}'s.  This does not alter
-@command{makeinfo}'s behavior, it just copies the @samp{@@charset} if
-present.)  Comments in CSS files are delimited by @samp{/* ... */}, as
-in C@.  An @samp{@@import} directive must be in one of these two forms:
+first in the file, with only whitespace and comments interspersed, no normal
+definitions.  Comments in CSS files are delimited by @samp{/* ... */}, as in
+C@.  An @samp{@@import} directive must be in one of these two forms:
 
 @example
 @@import url(http://example.org/foo.css);
diff --git a/tp/Texinfo/Common.pm b/tp/Texinfo/Common.pm
index 5df9685190..d3f69efd87 100644
--- a/tp/Texinfo/Common.pm
+++ b/tp/Texinfo/Common.pm
@@ -28,6 +28,7 @@ use 5.006;
 # to determine the null file
 use Config;
 use File::Spec;
+# for find_encoding, resolve_alias and maybe utf8 related functions
 use Encode;
 
 use Texinfo::Documentlanguages;
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index e331a51e28..f29f3f832e 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -48,6 +48,8 @@ use File::Copy qw(copy);
 
 use Storable;
 
+use Encode qw(find_encoding);
+
 use Texinfo::Common;
 use Texinfo::Config;
 use Texinfo::Convert::Unicode;
@@ -7089,6 +7091,27 @@ sub _process_css_file($$$)
   my $line_nr = 0;
   while (my $line = <$fh>) {
     $line_nr++;
+    if ($line_nr == 1) {
+      # the rule is to assume utf-8.  There could also be a BOM, and
+      # the Content-Type: HTTP header but it is not relevant here.
+      # https://developer.mozilla.org/en-US/docs/Web/CSS/@charset
+      my $charset = 'utf-8';
+      my $charset_line;
+      if ($line =~ /^\@charset  *"([^"]+)" *; *$/) {
+        $charset = $1;
+        $charset_line = 1;
+      }
+      my $Encode_encoding_object = find_encoding($charset);
+      if (defined($Encode_encoding_object)) {
+        my $input_perl_encoding = $Encode_encoding_object->name();
+        if ($input_perl_encoding eq 'utf-8') {
+          binmode($fh, ":utf8");
+        } else {
+          binmode($fh, ":encoding($input_perl_encoding)");
+        }
+      }
+      next if ($charset_line);
+    }
     #print STDERR "Line: $line";
     if ($in_rules) {
       push @$rules, $line;
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index 0b8a3c1e0c..14fbc5cf14 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -37,7 +37,7 @@ use Carp qw(cluck);
 use Data::Dumper;
 
 # to detect if an encoding may be used to open the files
-use Encode;
+use Encode qw(find_encoding);
 
 # for fileparse
 use File::Basename;
diff --git a/tp/tests/formatting/Makefile.am b/tp/tests/formatting/Makefile.am
index 9c23f259db..ff32fec9d7 100644
--- a/tp/tests/formatting/Makefile.am
+++ b/tp/tests/formatting/Makefile.am
@@ -3,7 +3,8 @@ EXTRA_DIST = \
  ignore_and_comments.texi   split_nocopying.texi \
  inc_file.texi              test_need.texi \
  lightweight_markups.texi   japanese_long_name.texi  \
- file.css list-of-tests  res_parser
+ osé.texi \
+ çss.css cêss.css file.css list-of-tests  res_parser
 
 DISTCLEANFILES = tests.log tests.out
 
diff --git "a/tp/tests/formatting/c\303\252ss.css" 
"b/tp/tests/formatting/c\303\252ss.css"
new file mode 100644
index 0000000000..e50300e009
--- /dev/null
+++ "b/tp/tests/formatting/c\303\252ss.css"
@@ -0,0 +1,4 @@
+@charset "iso-8859-15";
+
+ul.mark-euro {list-style-type: "�"}
+ul.mark-n�ni {list-style-type: "v��a"}
diff --git a/tp/tests/formatting/list-of-tests 
b/tp/tests/formatting/list-of-tests
index 1812a91573..42b635c039 100644
--- a/tp/tests/formatting/list-of-tests
+++ b/tp/tests/formatting/list-of-tests
@@ -9,3 +9,11 @@ simplest_test_css simplest.texi --css-include file.css
 
 # check that command line overrides document
 documentlanguage_cmdline documentlanguage.texi --document-language=fr
+
+# some command-line arguments when incorrect cause texi2any to die.
+# easily tested by calling directly ./texi2any.pl and checking visually:
+# ./texi2any.pl --footnote-style=bâd
+# ./texi2any.pl --paragraph-indent=ïndent
+# check non ascii command line arguments
+non_ascii_command_line osé.texi --html --split=Mekanïk 
--document-language=Destruktïw -c 'Kommandöh vâl' -D TÛT -D 'vùr ké' -U ôndef 
-c 'FORMAT_MENU mînù' --macro-expand=@OUT_DIR@osé-texinfo.texi 
--internal-links=@OUT_DIR@intérnal.txt --css-include çss.css --css-include 
cêss.css --css-ref=rëf --css-ref=öref
+
diff --git "a/tp/tests/formatting/os\303\251.texi" 
"b/tp/tests/formatting/os\303\251.texi"
new file mode 100644
index 0000000000..c273904f72
--- /dev/null
+++ "b/tp/tests/formatting/os\303\251.texi"
@@ -0,0 +1,15 @@
+\input texinfo.tex
+
+@setfilename osé.info
+
+@node Top
+@top Tôp
+
+@node Chaptêur
+@chapter Chapteùr
+
+@ifset TÛT
+isset TÛT
+@end ifset
+
+value vùr @value{vùr}.
diff --git 
a/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html 
b/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html
new file mode 100644
index 0000000000..78818eed59
--- /dev/null
+++ b/tp/tests/formatting/res_parser/non_ascii_command_line/Chapteur.html
@@ -0,0 +1,72 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>1 Chapteùr (Tôp)</title>
+
+<meta name="description" content="1 Chapteùr (Tôp)">
+<meta name="keywords" content="1 Chapteùr (Tôp)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import 
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+<div class="chapter-level-extent" id="Chapt_00eaur">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Beginning of 
this chapter or previous chapter"> &lt;&lt; </a>]</td>
+<td valign="middle" align="left">[<a href="index.html" title="Previous section 
in reading order"> &lt; </a>]</td>
+<td valign="middle" align="left">[<a href="index.html" title="Up section"> Up 
</a>]</td>
+<td valign="middle" align="left">[ &gt; ]</td>
+<td valign="middle" align="left">[ &gt;&gt; ]</td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left">[<a href="index.html" title="Cover (top) of 
document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About" 
title="About (help)"> ? </a>]</td>
+</tr></table>
+<hr>
+<h1 class="chapter" id="Chapteur">1 Chapteùr</h1>
+
+<p>isset TÛT
+</p>
+<p>value vùr ké.
+</p></div>
+<hr>
+<p>
+  <span class="program-in-footer">This document was generated on <em 
class="emph">a sunny day</em> using <a class="uref" 
href="http://www.gnu.org/software/texinfo/";><em 
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git a/tp/tests/formatting/res_parser/non_ascii_command_line/index.html 
b/tp/tests/formatting/res_parser/non_ascii_command_line/index.html
new file mode 100644
index 0000000000..d6a7955d49
--- /dev/null
+++ b/tp/tests/formatting/res_parser/non_ascii_command_line/index.html
@@ -0,0 +1,71 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Tôp</title>
+
+<meta name="description" content="Tôp">
+<meta name="keywords" content="Tôp">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import 
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+
+<div class="top-level-extent" id="Top">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[ &lt; ]</td>
+<td valign="middle" align="left">[<a href="Chapteur.html" title="Next section 
in reading order"> &gt; </a>]</td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About" 
title="About (help)"> ? </a>]</td>
+</tr></table>
+<hr>
+<h1 class="top" id="Top-1">Tôp</h1>
+
+</div>
+<hr>
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[ &lt; ]</td>
+<td valign="middle" align="left">[<a href="Chapteur.html" title="Next section 
in reading order"> &gt; </a>]</td>
+<td valign="middle" align="left"> &nbsp; </td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="osé_abt.html#SEC_About" 
title="About (help)"> ? </a>]</td>
+</tr></table>
+<p>
+  <span class="program-in-footer">This document was generated on <em 
class="emph">a sunny day</em> using <a class="uref" 
href="http://www.gnu.org/software/texinfo/";><em 
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git 
"a/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt" 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt"
new file mode 100644
index 0000000000..9b0c060cc5
--- /dev/null
+++ 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/int\303\251rnal.txt"
@@ -0,0 +1,2 @@
+index.html     toc     Tôp
+Chapteur.html  toc     1 Chapteùr
diff --git 
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
new file mode 100644
index 0000000000..587bad7166
--- /dev/null
+++ 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251-texinfo.texi"
@@ -0,0 +1,13 @@
+\input texinfo.tex
+
+@setfilename osé.info
+
+@node Top
+@top Tôp
+
+@node Chaptêur
+@chapter Chapteùr
+
+isset TÛT
+
+value vùr ké.
diff --git 
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.1" 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.1"
new file mode 100644
index 0000000000..e69de29bb2
diff --git 
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2" 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2"
new file mode 100644
index 0000000000..3d62935143
--- /dev/null
+++ "b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251.2"
@@ -0,0 +1,3 @@
+texi2any: warning: Mekanïk is not a valid split possibility
+texi2any: warning: Destruktïw is not a valid language code
+texi2any: warning: unknown variable from command line: Kommandöh
diff --git 
"a/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html" 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html"
new file mode 100644
index 0000000000..8c1656506c
--- /dev/null
+++ 
"b/tp/tests/formatting/res_parser/non_ascii_command_line/os\303\251_abt.html"
@@ -0,0 +1,164 @@
+<!DOCTYPE html>
+<html>
+<!-- Created by texinfo, http://www.gnu.org/software/texinfo/ -->
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>About This Document (Tôp)</title>
+
+<meta name="description" content="About This Document (Tôp)">
+<meta name="keywords" content="About This Document (Tôp)">
+<meta name="resource-type" content="document">
+<meta name="distribution" content="global">
+<meta name="Generator" content="texi2any">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+
+<style type="text/css">
+<!--
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import 
+*/
+
+@import ("strânge\" ;file") ;
+
+
+span.program-in-footer {font-size: smaller}
+@media tv { h3 {text-align: left} }
+ul.mark-euro {list-style-type: "€"}
+ul.mark-néni {list-style-type: "vàça"}
+
+-->
+</style>
+<link rel="stylesheet" type="text/css" href="rëf">
+<link rel="stylesheet" type="text/css" href="öref">
+
+
+</head>
+
+<body lang="Destruktïw">
+<div class="element-about" id="SEC_About">
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Cover (top) 
of document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ? 
</a>]</td>
+</tr></table>
+<hr>
+<h1 class="about-heading">About This Document</h1>
+
+<p>
+  This document was generated on <em class="emph">a sunny day</em> using <a 
class="uref" href="http://www.gnu.org/software/texinfo/";><em 
class="emph">texi2any</em></a>.
+</p>
+<p>
+  The buttons in the navigation panels have the following meaning:
+</p>
+<table border="1">
+  <tr>
+    <th> Button </th>
+    <th> Name </th>
+    <th> Go to </th>
+    <th> From 1.2.3 go to</th>
+  </tr>
+  <tr>
+    <td align="center"> [ &lt;&lt; ] </td>
+    <td align="center">FastBack</td>
+    <td>Beginning of this chapter or previous chapter</td>
+    <td>1</td>
+  </tr>
+  <tr>
+    <td align="center"> [ &lt; ] </td>
+    <td align="center">Back</td>
+    <td>Previous section in reading order</td>
+    <td>1.2.2</td>
+  </tr>
+  <tr>
+    <td align="center"> [ Up ] </td>
+    <td align="center">Up</td>
+    <td>Up section</td>
+    <td>1.2</td>
+  </tr>
+  <tr>
+    <td align="center"> [ &gt; ] </td>
+    <td align="center">Forward</td>
+    <td>Next section in reading order</td>
+    <td>1.2.4</td>
+  </tr>
+  <tr>
+    <td align="center"> [ &gt;&gt; ] </td>
+    <td align="center">FastForward</td>
+    <td>Next chapter</td>
+    <td>2</td>
+  </tr>
+  <tr>
+    <td align="center"> [Top] </td>
+    <td align="center">Top</td>
+    <td>Cover (top) of document</td>
+    <td> &nbsp; </td>
+  </tr>
+  <tr>
+    <td align="center"> [Contents] </td>
+    <td align="center">Contents</td>
+    <td>Table of contents</td>
+    <td> &nbsp; </td>
+  </tr>
+  <tr>
+    <td align="center"> [Index] </td>
+    <td align="center">Index</td>
+    <td>Index</td>
+    <td> &nbsp; </td>
+  </tr>
+  <tr>
+    <td align="center"> [ ? ] </td>
+    <td align="center">About</td>
+    <td>About (help)</td>
+    <td> &nbsp; </td>
+  </tr>
+</table>
+
+<p>
+  where the <strong class="strong"> Example </strong> assumes that the current 
position is at <strong class="strong"> Subsubsection One-Two-Three </strong> of 
a document of the following structure:
+</p>
+
+<ul>
+  <li> 1. Section One
+    <ul>
+      <li>1.1 Subsection One-One
+        <ul>
+          <li>...</li>
+        </ul>
+      </li>
+      <li>1.2 Subsection One-Two
+        <ul>
+          <li>1.2.1 Subsubsection One-Two-One</li>
+          <li>1.2.2 Subsubsection One-Two-Two</li>
+          <li>1.2.3 Subsubsection One-Two-Three &nbsp; &nbsp;
+            <strong>&lt;== Current Position </strong></li>
+          <li>1.2.4 Subsubsection One-Two-Four</li>
+        </ul>
+      </li>
+      <li>1.3 Subsection One-Three
+        <ul>
+          <li>...</li>
+        </ul>
+      </li>
+      <li>1.4 Subsection One-Four</li>
+    </ul>
+  </li>
+</ul>
+</div><hr>
+<table class="nav-panel" cellpadding="1" cellspacing="1" border="0">
+<tr><td valign="middle" align="left">[<a href="index.html" title="Cover (top) 
of document">Top</a>]</td>
+<td valign="middle" align="left">[Contents]</td>
+<td valign="middle" align="left">[Index]</td>
+<td valign="middle" align="left">[<a href="#SEC_About" title="About (help)"> ? 
</a>]</td>
+</tr></table>
+<p>
+  <span class="program-in-footer">This document was generated on <em 
class="emph">a sunny day</em> using <a class="uref" 
href="http://www.gnu.org/software/texinfo/";><em 
class="emph">texi2any</em></a>.</span>
+</p>
+
+
+</body>
+</html>
diff --git "a/tp/tests/formatting/\303\247ss.css" 
"b/tp/tests/formatting/\303\247ss.css"
new file mode 100644
index 0000000000..bf9d7c42c5
--- /dev/null
+++ "b/tp/tests/formatting/\303\247ss.css"
@@ -0,0 +1,10 @@
+/* a comment */
+@import "éfile2.css" tv
+;
+
+/* another comment
+@import 
+*/
+
+@import ("strânge\" ;file") ;
+@media tv { h3 {text-align: left} }
diff --git a/tp/tests/run_parser_all.sh b/tp/tests/run_parser_all.sh
index 5c475d4c46..932dcc6d7b 100755
--- a/tp/tests/run_parser_all.sh
+++ b/tp/tests/run_parser_all.sh
@@ -116,7 +116,7 @@ post_process_output ()
   fi
 }
 
-LC_ALL=C; export LC_ALL
+LC_ALL=C.UTF-8; export LC_ALL
 
 prepended_command=
 #prepended_command=time
@@ -228,7 +228,8 @@ if [ "z$clean" = 'zyes' -o "z$copy" = 'zyes' ]; then
 # there are better ways
     dir=`echo $line | awk '{print $1}'`
     file=`echo $line | awk '{print $2}'`
-    remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]*  *[a-zA-Z0-9_./-]* *//'`
+    #remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]*  *[a-zA-Z0-9_./-]* *//'`
+    remaining=`echo $line | sed 's/[a-zA-Z0-9_é./-]*  *[a-zA-Z0-9_é./-]* *//'`
     [ "z$dir" = 'z' -o "z$file" = 'z' ] && continue
     if [ "z$clean" = 'zyes' ]; then
       for command_dir in $commands; do
@@ -284,7 +285,8 @@ while read line; do
   fi
 
   basename=`basename $file .texi`
-  remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]*  *[a-zA-Z0-9_./-]* *//'`
+  #remaining=`echo $line | sed 's/[a-zA-Z0-9_./-]*  *[a-zA-Z0-9_./-]* *//'`
+  remaining=`echo $line | sed 's/[a-zA-Z0-9_é./-]*  *[a-zA-Z0-9_é./-]* *//'`
   src_file="$srcdir/$testdir/$file"
   
   for command_dir in $commands; do
diff --git a/tp/tests/test_scripts/formatting_non_ascii_command_line.sh 
b/tp/tests/test_scripts/formatting_non_ascii_command_line.sh
new file mode 100755
index 0000000000..c83a211a26
--- /dev/null
+++ b/tp/tests/test_scripts/formatting_non_ascii_command_line.sh
@@ -0,0 +1,19 @@
+#! /bin/sh
+# This file generated by maintain/regenerate_cmd_tests.sh
+
+if test z"$srcdir" = "z"; then
+  srcdir=.
+fi
+
+one_test_logs_dir=test_log
+
+
+dir=formatting
+name='non_ascii_command_line'
+mkdir -p $dir
+
+"$srcdir"/run_parser_all.sh -dir $dir $name
+exit_status=$?
+cat $dir/$one_test_logs_dir/$name.log
+exit $exit_status
+
diff --git a/tp/texi2any.pl b/tp/texi2any.pl
index 67a7dca142..8be0945de9 100755
--- a/tp/texi2any.pl
+++ b/tp/texi2any.pl
@@ -28,7 +28,7 @@ use strict;
 # to determine the locale encoding
 use I18N::Langinfo qw(langinfo CODESET);
 # to decode command line arguments
-use Encode;
+use Encode qw(decode encode find_encoding);
 # for file names portability
 use File::Spec;
 # to determine the path separator and null file
@@ -345,7 +345,7 @@ sub _decode_i18n_string($$)
 {
   my $string = shift;
   my $encoding = shift;
-  return Encode::decode($encoding, $string);
+  return decode($encoding, $string);
 }
 
 sub _encode_message($)
@@ -353,7 +353,7 @@ sub _encode_message($)
   my $text = shift;
   my $encoding = get_conf('MESSAGE_OUTPUT_ENCODING_NAME');
   if (defined($encoding)) {
-    return Encode::encode($encoding, $text);
+    return encode($encoding, $text);
   } else {
     return $text;
   }
@@ -682,7 +682,7 @@ sub _decode_input($)
 
   my $encoding = get_conf('DATA_INPUT_ENCODING_NAME');
   if (defined($encoding)) {
-    return Encode::decode($encoding, $text);
+    return decode($encoding, $text);
   } else {
     return $text;
   }
@@ -879,17 +879,18 @@ There is NO WARRANTY, to the extent permitted by 
law.\n"), "2021");
  'number-footnotes!' => sub { set_from_cmdline('NUMBER_FOOTNOTES', $_[1]); },
  'node-files!' => sub { set_from_cmdline('NODE_FILES', $_[1]); },
  'footnote-style=s' => sub {
-    if ($_[1] eq 'end' or $_[1] eq 'separate') {
-       set_from_cmdline('footnotestyle', $_[1]);
+    my $value = _decode_input($_[1]);
+    if ($value eq 'end' or $value eq 'separate') {
+       set_from_cmdline('footnotestyle', $value);
     } else {
-      # FIXME decode/encode?
-      die sprintf(__("%s: --footnote-style arg must be `separate' or `end', 
not `%s'.\n"),
-                  $real_command_name, $_[1]);
+      die _encode_message(
+           sprintf(__("%s: --footnote-style arg must be `separate' or `end', 
not `%s'.\n"),
+                  $real_command_name, $value));
     }
   },
- 'split=s' => sub {  my $split = $_[1];
+ 'split=s' => sub {  my $split = _decode_input($_[1]);
                      my @messages 
-                       = Texinfo::Common::warn_unknown_split($_[1]);
+                       = Texinfo::Common::warn_unknown_split($split);
                      if (@messages) {
                        foreach my $message (@messages) {
                          document_warn($message);
@@ -911,12 +912,10 @@ There is NO WARRANTY, to the extent permitted by 
law.\n"), "2021");
                      $format = 'plaintext' if (!$_[1] and $format eq 'info'); 
},
  'output|out|o=s' => sub {
     my $var = 'OUTFILE';
-    # do not decode before calling -d as -d expects bytes
     if ($_[1] =~ m:/$: or -d $_[1]) {
       set_from_cmdline($var, undef);
       $var = 'SUBDIR';
     }
-    #set_from_cmdline($var, _decode_input($_[1]));
     set_from_cmdline($var, $_[1]);
     push @texi2dvi_args, '-o', $_[1];
   },
@@ -972,12 +971,13 @@ There is NO WARRANTY, to the extent permitted by 
law.\n"), "2021");
  'error-limit|e=i' => sub { set_from_cmdline('ERROR_LIMIT', $_[1]); },
  'split-size=s' => sub {set_from_cmdline('SPLIT_SIZE', $_[1])},
  'paragraph-indent|p=s' => sub {
-    my $value = $_[1];
+    my $value = _decode_input($_[1]);
     if ($value =~ /^([0-9]+)$/ or $value eq 'none' or $value eq 'asis') {
-      set_from_cmdline('paragraphindent', $_[1]);
+      set_from_cmdline('paragraphindent', $value);
     } else {
-      die sprintf(__("%s: --paragraph-indent arg must be 
numeric/`none'/`asis', not `%s'.\n"), 
-                  $real_command_name, $value);
+      die _encode_message(sprintf(
+       __("%s: --paragraph-indent arg must be numeric/`none'/`asis', not 
`%s'.\n"),
+                  $real_command_name, $value));
     }
  },
  'fill-column|f=i' => sub {set_from_cmdline('FILLCOLUMN',$_[1]);},
@@ -1010,6 +1010,11 @@ There is NO WARRANTY, to the extent permitted by 
law.\n"), "2021");
 
 exit 1 if (!$result_options);
 
+# those are strings combined with output so decode
+my $ref_css_refs = get_conf('CSS_REFS');
+my @input_css_refs = @{$ref_css_refs};
+@$ref_css_refs = map {_decode_input($_)} @input_css_refs;
+
 # Change some options depending on the settings of other ones set formats
 sub process_config {
   my $conf = shift;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]