texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: New XS override


From: Gavin D. Smith
Subject: branch master updated: New XS override
Date: Sat, 04 Jun 2022 11:18:56 -0400

This is an automated email from the git hooks/post-receive script.

gavin pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 781407be76 New XS override
781407be76 is described below

commit 781407be76c0a7a03655096a50cbdd2e0867a9df
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Sat Jun 4 16:18:46 2022 +0100

    New XS override
    
    * tp/Texinfo/Convert/HTML.pm (_convert_text, _entity_text):
    Split out _entity_text, used for USE_ISO.
    
    * tp/Texinfo/XS/MiscXS.xs,
    * tp/Texinfo/XS/misc.c (xs_entity_test): New function.
    * tp/Texinfo/Convert/HTML.pm: Override _entity_text
    with XS implementation.
---
 ChangeLog                  | 12 +++++++
 tp/Texinfo/Convert/HTML.pm | 25 +++++++++++----
 tp/Texinfo/XS/MiscXS.xs    | 21 ++++++++++++
 tp/Texinfo/XS/misc.c       | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 tp/Texinfo/XS/miscxs.h     |  1 +
 5 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6ad032bff6..bc93a83d64 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2022-06-04  Gavin Smith  <gavinsmith0123@gmail.com>
+
+       New XS override
+
+       * tp/Texinfo/Convert/HTML.pm (_convert_text, _entity_text):
+       Split out _entity_text, used for USE_ISO.
+
+       * tp/Texinfo/XS/MiscXS.xs,
+       * tp/Texinfo/XS/misc.c (xs_entity_test): New function.
+       * tp/Texinfo/Convert/HTML.pm: Override _entity_text
+       with XS implementation.
+
 2022-06-04  Gavin Smith  <gavinsmith0123@gmail.com>
 
        Joint tree transformation
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index 6872e1f277..4dbf16e7a7 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -82,6 +82,9 @@ sub import {
     Texinfo::XSLoader::override(
       "Texinfo::Convert::HTML::_default_format_protect_text",
       "Texinfo::MiscXS::default_format_protect_text");
+    Texinfo::XSLoader::override(
+      "Texinfo::Convert::HTML::_entity_text",
+      "Texinfo::MiscXS::entity_text");
     $module_loaded = 1;
   }
   # The usual import method
@@ -5431,6 +5434,21 @@ sub _convert_definfoenclose_type($$$$) {
 $default_types_conversion{'definfoenclose_command'}
   = \&_convert_definfoenclose_type;
 
+# Note: has an XS override
+sub _entity_text
+{
+  my $text = shift;
+
+  $text =~ s/---/\&mdash\;/g;
+  $text =~ s/--/\&ndash\;/g;
+  $text =~ s/``/\&ldquo\;/g;
+  $text =~ s/''/\&rdquo\;/g;
+  $text =~ s/'/\&rsquo\;/g;
+  $text =~ s/`/\&lsquo\;/g;
+
+  return $text;
+}
+
 sub _convert_text($$$)
 {
   my $self = shift;
@@ -5464,12 +5482,7 @@ sub _convert_text($$$)
     if ($self->{'conf'}->{'USE_NUMERIC_ENTITY'}) {
       $text = $self->xml_format_text_with_numeric_entities($text);
     } elsif ($self->{'conf'}->{'USE_ISO'}) {
-      $text =~ s/---/\&mdash\;/g;
-      $text =~ s/--/\&ndash\;/g;
-      $text =~ s/``/\&ldquo\;/g;
-      $text =~ s/''/\&rdquo\;/g;
-      $text =~ s/'/\&rsquo\;/g;
-      $text =~ s/`/\&lsquo\;/g;
+      $text = _entity_text($text);
     } else {
       $text =~ s/``/&quot;/g;
       $text =~ s/''/&quot;/g;
diff --git a/tp/Texinfo/XS/MiscXS.xs b/tp/Texinfo/XS/MiscXS.xs
index 6b19907f9c..53daf92df3 100644
--- a/tp/Texinfo/XS/MiscXS.xs
+++ b/tp/Texinfo/XS/MiscXS.xs
@@ -100,6 +100,27 @@ xs_unicode_text (text_in, ...)
  OUTPUT:
      RETVAL
 
+SV *
+xs_entity_text (text_in)
+     SV *text_in
+ PREINIT:
+     char *text;
+     char *retval;
+ CODE:
+     /* Make sure the input is in UTF-8. */
+     if (!SvUTF8 (text_in))
+       sv_utf8_upgrade (text_in);
+
+     text = SvPV_nolen (text_in);
+
+     retval = xs_entity_text (text);
+
+     RETVAL = newSVpv (retval, 0);
+     SvUTF8_on (RETVAL);
+
+ OUTPUT:
+     RETVAL
+
 void
 xs_parse_texi_regex (text)
      SV *text
diff --git a/tp/Texinfo/XS/misc.c b/tp/Texinfo/XS/misc.c
index ff134edb3d..0f161f5470 100644
--- a/tp/Texinfo/XS/misc.c
+++ b/tp/Texinfo/XS/misc.c
@@ -547,6 +547,85 @@ xs_unicode_text (char *text, int in_code)
   return new;
 }
 
+char *
+xs_entity_text (char *text)
+{
+  char *p, *q;
+  static char *new;
+  int new_space, new_len;
+
+  dTHX; /* Perl boilerplate. */
+
+  p = text;
+  new_space = strlen (text);
+  new = realloc (new, new_space + 1);
+  new_len = 0;
+
+#define ADDN(s, n) \
+  if (new_len + n - 1 >= new_space - 1)           \
+    {                                             \
+      new_space += n;                             \
+      new = realloc (new, (new_space *= 2) + 1);  \
+    }                                             \
+  memcpy(new + new_len, s, n);                    \
+  new_len += n;
+
+  while (1)
+    {
+      q = p + strcspn (p, "-`'");
+      ADDN(p, q - p);
+      if (!*q)
+        break;
+      switch (*q)
+        {
+        case '-':
+          if (!memcmp (q, "---", 3))
+            {
+              p = q + 3;
+              ADDN("&mdash;", 7);
+            }
+          else if (!memcmp (q, "--", 2))
+            {
+              p = q + 2;
+              ADDN("&ndash;", 7);
+            }
+          else
+            {
+              p = q + 1;
+              ADD1(*q);
+            }
+          break;
+        case '`':
+          if (!memcmp (q, "``", 2))
+            {
+              p = q + 2;
+              ADDN("&ldquo;", 7);
+            }
+          else
+            {
+              p = q + 1;
+              ADDN("&lsquo;", 7);
+            }
+          break;
+        case '\'':
+          if (!memcmp (q, "''", 2))
+            {
+              p = q + 2;
+              ADDN("&rdquo;", 7);
+            }
+          else
+            {
+              p = q + 1;
+              ADDN("&rsquo;", 7);
+            }
+          break;
+        }
+    }
+
+  new[new_len] = '\0';
+  return new;
+}
+
 /* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
        $separator_match) */
 void xs_parse_texi_regex (SV *text_in,
diff --git a/tp/Texinfo/XS/miscxs.h b/tp/Texinfo/XS/miscxs.h
index 65b3be870a..bce4f46564 100644
--- a/tp/Texinfo/XS/miscxs.h
+++ b/tp/Texinfo/XS/miscxs.h
@@ -1,4 +1,5 @@
 char *xs_unicode_text (char *, int);
+char *xs_entity_text (char *);
 char *xs_process_text (char *text);
 HV *xs_merge_text (HV *self, HV *current, SV *text_in);
 int xs_abort_empty_line (HV *self, HV *current, SV *additional_text);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]