texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[7600] xs_parse_texi_regex


From: gavinsmith0123
Subject: [7600] xs_parse_texi_regex
Date: Sun, 1 Jan 2017 20:56:24 +0000 (UTC)

Revision: 7600
          http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7600
Author:   gavin
Date:     2017-01-01 20:56:23 +0000 (Sun, 01 Jan 2017)
Log Message:
-----------
xs_parse_texi_regex

Modified Paths:
--------------
    trunk/ChangeLog
    trunk/tp/Texinfo/MiscXS/MiscXS.xs
    trunk/tp/Texinfo/MiscXS/miscxs.c
    trunk/tp/Texinfo/MiscXS/miscxs.h
    trunk/tp/Texinfo/Parser.pm

Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog     2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/ChangeLog     2017-01-01 20:56:23 UTC (rev 7600)
@@ -1,5 +1,15 @@
 2017-01-01  Gavin Smith  <address@hidden>
 
+       * tp/Texinfo/Parser.pm (_parse_texi, _parse_texi_regex): Split 
+       out several regexes used at the start of the remaining part of 
+       the line.
+       * tp/Texinfo/MiscXS/MiscXS.xs,
+       * tp/Texinfo/MiscXS/miscxs.c (xs_parse_texi_regex): New function.
+       * tp/Texinfo/Parser.pm: Add a 'UNITCHECK' section to override 
+       '_parse_texi_regex'.
+
+2017-01-01  Gavin Smith  <address@hidden>
+
        * tp/t/automatic_nodes.t: Use the "Texinfo::ModulePath" module.
 
 2017-01-01  Gavin Smith  <address@hidden>

Modified: trunk/tp/Texinfo/MiscXS/MiscXS.xs
===================================================================
--- trunk/tp/Texinfo/MiscXS/MiscXS.xs   2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/MiscXS.xs   2017-01-01 20:56:23 UTC (rev 7600)
@@ -99,3 +99,36 @@
 
  OUTPUT:
      RETVAL
+
+void
+xs_parse_texi_regex (text)
+     SV *text
+  PREINIT:
+     char *at_command;
+     char *open_brace;
+     char *asterisk;
+     char *single_letter_command;
+     char *separator_match;
+     char *new_text;
+  PPCODE:
+     xs_parse_texi_regex(text, &at_command, &open_brace, &asterisk, 
+                         &single_letter_command, &separator_match, &new_text);
+     EXTEND(SP,6);
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(0), at_command);
+     SvUTF8_on(ST(0));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(1), open_brace);
+     SvUTF8_on(ST(1));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(2), asterisk);
+     SvUTF8_on(ST(2));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(3), single_letter_command);
+     SvUTF8_on(ST(3));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(4), separator_match);
+     SvUTF8_on(ST(4));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(5), new_text);
+     SvUTF8_on(ST(5));

Modified: trunk/tp/Texinfo/MiscXS/miscxs.c
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.c    2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/miscxs.c    2017-01-01 20:56:23 UTC (rev 7600)
@@ -553,3 +553,80 @@
   new[new_len] = '\0';
   return new;
 }
+
+/* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
+       $separator_match) */
+void xs_parse_texi_regex (SV *text_in,
+                          char **at_command,
+                          char **open_brace,
+                          char **asterisk,
+                          char **single_letter_command,
+                          char **separator_match,
+                          char **new_text)
+{
+  char *text;
+
+  dTHX;
+
+  /* Make sure the input is in UTF8. */
+  if (!SvUTF8 (text_in))
+    sv_utf8_upgrade (text_in);
+  text = SvPV_nolen (text_in);
+
+  *at_command = *open_brace = *asterisk = *single_letter_command
+          = *separator_match = *new_text = 0;
+
+  if (*text == '@' && isalnum(text[1]))
+    {
+      char *p, *q;
+
+      p = text + 1;
+      q = text + 2;
+      while (isalnum (*q) || *q == '-' || *q == '_')
+        q++;
+      *at_command = strndup (p, q - p);
+    }
+  else
+    {
+      if (*text == '{')
+        {
+          *open_brace = strdup ("{");
+          *separator_match = strdup ("{");
+        }
+
+      else if (*text == '@'
+                 && text[1] && strchr ("([\"'address@hidden,.!?"
+                                       " \f\n\r\t"
+                                       "*-^`=:|/\\",
+                                       text[1]))
+        {
+          *single_letter_command = malloc (2);
+          (*single_letter_command)[0] = text[1];
+          (*single_letter_command)[1] = '\0';
+        }
+
+      else if (strchr ("{}@,:\t.\f", *text))
+        {
+          *separator_match = malloc (2);
+          (*separator_match)[0] = *text;
+          (*separator_match)[1] = '\0';
+        }
+
+      else
+        {
+          char *p;
+
+          if (*text == '*')
+            {
+              *asterisk = strdup ("*");
+            }
+
+          p = text;
+          p += strcspn (p, "{}@,:\t.\n\f");
+          if (p > text)
+            *new_text = strndup (text, p - text);
+        }
+  }
+
+  return;
+}

Modified: trunk/tp/Texinfo/MiscXS/miscxs.h
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.h    2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/miscxs.h    2017-01-01 20:56:23 UTC (rev 7600)
@@ -2,3 +2,10 @@
 char *xs_process_text (char *text);
 HV *xs_merge_text (HV *self, HV *current, SV *text_in);
 int xs_abort_empty_line (HV *self, HV *current, SV *additional_text);
+void xs_parse_texi_regex (SV *text,
+                          char **,
+                          char **,
+                          char **,
+                          char **,
+                          char **,
+                          char **);

Modified: trunk/tp/Texinfo/Parser.pm
===================================================================
--- trunk/tp/Texinfo/Parser.pm  2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/Parser.pm  2017-01-01 20:56:23 UTC (rev 7600)
@@ -3515,6 +3515,37 @@
       if (defined($marked_as_invalid_command));
   }
 }
+
+UNITCHECK {
+  Texinfo::XSLoader::override ("Texinfo::Parser::_parse_texi_regex",
+    "Texinfo::MiscXS::parse_texi_regex");
+}
+
+# This combines several regular expressions used in '_parse_texi' to
+# look at what is next on the remaining part of the line.
+sub _parse_texi_regex {
+  my ($line) = @_;
+
+  my ($at_command, $open_brace, $asterisk, $single_letter_command,
+      $separator_match, $misc_text)
+    = ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)
+                |^(\{)
+                |^(\*)
+                |^\@(["'address@hidden,\.!\?\s\*\-\^`=:\|\/\\])
+                |^([{}@,:\t.\f])
+                |^([^{}@,:\t.\n\f]+)
+                /x);
+
+  if ($open_brace) {
+    $separator_match = $open_brace;
+  } elsif ($asterisk) {
+    ($misc_text) = ($line =~ /^([^{}@,:\t.\n\f]+)/);
+  }
+
+  return ($at_command, $open_brace, $asterisk, $single_letter_command,
+    $separator_match, $misc_text);
+}
+
 # the different types
 #c 'menu_entry'
 #c 'menu_entry'
@@ -3776,13 +3807,13 @@
       # handle user defined macros before anything else since
       # their expansion may lead to changes in the line
       # REMACRO
-      my $at_command = undef;
       my $at_command_length;
-      if ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)/g) {
-        $at_command = $1;
-        # Get length with pos instead of length($1) for efficiency
-        $at_command_length = pos($line);
-        pos($line) = 0;
+      
+      my ($at_command, $open_brace, $asterisk, $single_letter_command,
+        $separator_match, $misc_text) = _parse_texi_regex ($line);
+
+      if ($at_command) {
+        $at_command_length = length($at_command) + 1;
       }
       if ($at_command
             and ($self->{'macros'}->{$at_command} 
@@ -3877,7 +3908,7 @@
       } elsif ($current->{'cmdname'} and 
           (defined($brace_commands{$current->{'cmdname'}}) or 
             $self->{'definfoenclose'}->{$current->{'cmdname'}})
-           and $line !~ /^{/) {
+          and !$open_brace) {
         # special case for @-command as argument of @itemize or @*table.
         if (_command_with_command_as_argument($current->{'parent'})) {
           print STDERR "FOR PARENT 
address@hidden>{'parent'}->{'parent'}->{'cmdname'} command_as_argument 
$current->{'cmdname'}\n" if ($self->{'DEBUG'});
@@ -3946,7 +3977,7 @@
                 and $current->{'parent'}->{'type'} 
                 and ($current->{'parent'}->{'type'} eq 'menu_comment'
                      or $current->{'parent'}->{'type'} eq 
'menu_entry_description')
-                and $line =~ /^\*/
+                and $asterisk
                 and @{$current->{'contents'}} 
                 and $current->{'contents'}->[-1]->{'type'}
                 and $current->{'contents'}->[-1]->{'type'} eq 'empty_line'
@@ -4050,11 +4081,11 @@
           $current = _enter_menu_entry_node($self, $current, $line_nr);
         }
         # REMACRO
-      } elsif ($at_command
-               or $line =~ 
s/^\@(["'address@hidden,\.!\?\s\*\-\^`=:\|\/\\])//o) {
+      } elsif ($at_command or $single_letter_command) {
         my $command;
         if (!$at_command) {
-          $command = $1;
+          $command = $single_letter_command;
+          substr($line, 0, 2) = '';
         } else {
           $command = $at_command;
           substr($line, 0, $at_command_length) = '';
@@ -4815,9 +4846,9 @@
           $self->line_error(sprintf($self->__("unknown command `%s'"), 
                                       $command), $line_nr);
         }
-
-      } elsif ($line =~ s/^([{}@,:\t.\f])//) {
-        my $separator = $1;
+      } elsif ($separator_match) {
+        my $separator = $separator_match;
+        substr ($line, 0, 1) = '';
         print STDERR "SEPARATOR: $separator\n" if ($self->{'DEBUG'});
         if ($separator eq '@') {
           # this may happen with a @ at the very end of a file, therefore
@@ -5341,8 +5372,9 @@
           $current = _merge_text($self, $current, $separator);
         }
       # Misc text except end of line
-      } elsif ($line =~ s/^([^{}@,:\t.\n\f]+)//) {
-        my $new_text = $1;
+      } elsif (defined $misc_text) {
+        my $new_text = $misc_text;
+        substr ($line, 0, length ($misc_text)) = '';
         $current = _merge_text($self, $current, $new_text);
       # end of line
       } else {




reply via email to

[Prev in Thread] Current Thread [Next in Thread]