texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/Encoding.pm (encoding_alias): use En


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding, both for the perl name, and, with mime_name() lower cased for the output/HTML/input encoding name. Remove %encoding_aliases and %perl_charset_to_html.
Date: Fri, 18 Feb 2022 16:51:13 -0500

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 0986f8eeab * tp/Texinfo/Encoding.pm (encoding_alias): use 
Encode::find_encoding, both for the perl name, and, with mime_name() lower 
cased for the output/HTML/input encoding name.  Remove %encoding_aliases and 
%perl_charset_to_html.
0986f8eeab is described below

commit 0986f8eeab7fe4cd0cf24ef596987d78c5458645
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Feb 18 22:51:04 2022 +0100

    * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding,
    both for the perl name, and, with mime_name() lower cased for the
    output/HTML/input encoding name.  Remove %encoding_aliases and
    %perl_charset_to_html.
---
 ChangeLog                          |  7 +++++++
 tp/Texinfo/Encoding.pm             | 32 +++++++-------------------------
 tp/Texinfo/XS/parsetexi/end_line.c | 12 +++++++-----
 3 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b10e737064..77ab5557b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2022-02-18  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/Encoding.pm (encoding_alias): use Encode::find_encoding,
+       both for the perl name, and, with mime_name() lower cased for the
+       output/HTML/input encoding name.  Remove %encoding_aliases and
+       %perl_charset_to_html.
+
 2022-02-18  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/Unicode.pm (%unicode_to_eight_bit)
diff --git a/tp/Texinfo/Encoding.pm b/tp/Texinfo/Encoding.pm
index f70e4da229..6195737c3d 100644
--- a/tp/Texinfo/Encoding.pm
+++ b/tp/Texinfo/Encoding.pm
@@ -22,7 +22,7 @@ package Texinfo::Encoding;
 
 use strict;
 
-use Encode;
+use Encode qw(find_encoding);
 
 require Exporter;
 use vars qw(@ISA @EXPORT_OK);
@@ -30,40 +30,22 @@ use vars qw(@ISA @EXPORT_OK);
 
 @EXPORT_OK = qw( encoding_alias );
 
-
-# charset related definitions.
-
-my %perl_charset_to_html = (
-              'utf8'       => 'utf-8',
-              'utf-8-strict'       => 'utf-8',
-              'ascii'      => 'us-ascii',
-              'shiftjis'      => 'shift_jis',
-);
-
-# encoding name normalization to html-compatible encoding names
-my %encoding_aliases;
-
-foreach my $perl_charset (keys(%perl_charset_to_html)) {
-   $encoding_aliases{$perl_charset} = $perl_charset_to_html{$perl_charset};
-   $encoding_aliases{$perl_charset_to_html{$perl_charset}}
-        = $perl_charset_to_html{$perl_charset};
-}
-
 my %canonical_texinfo_encodings;
 # These are the encodings from the texinfo manual
 foreach my $canonical_encoding ('us-ascii', 'utf-8', 'iso-8859-1',
                   'iso-8859-15', 'iso-8859-2', 'koi8-r', 'koi8-u') {
   $canonical_texinfo_encodings{$canonical_encoding} = 1;
-  $encoding_aliases{$canonical_encoding} = $canonical_encoding;
 }
 
 sub encoding_alias($)
 {
   my $encoding = shift;
-  my $perl_encoding = Encode::resolve_alias($encoding);
-  my $canonical_output_encoding;
-  if ($perl_encoding) {
-    $canonical_output_encoding = $encoding_aliases{$perl_encoding};
+  my $enc = find_encoding($encoding);
+  my ($perl_encoding, $canonical_output_encoding);
+  if (defined($enc)) {
+    $perl_encoding = $enc->name();
+    # mime_name() is upper-case, our keys are lower case, set to lower case
+    $canonical_output_encoding = lc($enc->mime_name());
   }
   my $canonical_texinfo_encoding;
   foreach my $possible_encoding ($encoding, $canonical_output_encoding,
diff --git a/tp/Texinfo/XS/parsetexi/end_line.c 
b/tp/Texinfo/XS/parsetexi/end_line.c
index 625d51dc99..cd05a004c1 100644
--- a/tp/Texinfo/XS/parsetexi/end_line.c
+++ b/tp/Texinfo/XS/parsetexi/end_line.c
@@ -1523,9 +1523,10 @@ end_line_misc_line (ELEMENT *current)
                   struct encoding_map {
                       char *from; char *to;
                   };
-                  /* The map mimics Encode::resolve_alias() result.  Even when
-                     the alias is not good, such as 'utf-8-strict' for 'utf-8'
-                     use the same mapping for consistency with the perl Parser 
*/
+                  /* The map mimics Encode::find_encoding()->name() result.
+                     Even when the alias is not good, such as 'utf-8-strict'
+                     for 'utf-8', use the same mapping for consistency with the
+                     perl Parser */
                   static struct encoding_map map[] = {
                       "utf-8", "utf-8-strict",
                       "us-ascii", "ascii",
@@ -1550,7 +1551,7 @@ end_line_misc_line (ELEMENT *current)
                 {
                   command_warn (current, "unrecognized encoding name `%s'",
                                 text);
-                  /* Texinfo::Encoding calls Encode::resolve_alias, so knows
+                  /* Texinfo::Encoding calls Encode::find_encoding, so knows
                      about more encodings than what we know about here.
                      TODO: Check when perl_encoding could be defined when 
                      texinfo_encoding isn't.
@@ -1559,7 +1560,8 @@ end_line_misc_line (ELEMENT *current)
 
                 }
 
-              /* Set input_encoding from perl_encoding */
+              /* Set input_encoding from perl_encoding.  In the perl parser,
+                 lc(Encode::find_encoding()->mime_name()) is used */
               input_encoding = 0;
               if (perl_encoding)
                 {



reply via email to

[Prev in Thread] Current Thread [Next in Thread]