[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[7600] xs_parse_texi_regex
From: |
gavinsmith0123 |
Subject: |
[7600] xs_parse_texi_regex |
Date: |
Sun, 1 Jan 2017 20:56:24 +0000 (UTC) |
Revision: 7600
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=7600
Author: gavin
Date: 2017-01-01 20:56:23 +0000 (Sun, 01 Jan 2017)
Log Message:
-----------
xs_parse_texi_regex
Modified Paths:
--------------
trunk/ChangeLog
trunk/tp/Texinfo/MiscXS/MiscXS.xs
trunk/tp/Texinfo/MiscXS/miscxs.c
trunk/tp/Texinfo/MiscXS/miscxs.h
trunk/tp/Texinfo/Parser.pm
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/ChangeLog 2017-01-01 20:56:23 UTC (rev 7600)
@@ -1,5 +1,15 @@
2017-01-01 Gavin Smith <address@hidden>
+ * tp/Texinfo/Parser.pm (_parse_texi, _parse_texi_regex): Split
+ out several regexes used at the start of the remaining part of
+ the line.
+ * tp/Texinfo/MiscXS/MiscXS.xs,
+ * tp/Texinfo/MiscXS/miscxs.c (xs_parse_texi_regex): New function.
+ * tp/Texinfo/Parser.pm: Add a 'UNITCHECK' section to override
+ '_parse_texi_regex'.
+
+2017-01-01 Gavin Smith <address@hidden>
+
* tp/t/automatic_nodes.t: Use the "Texinfo::ModulePath" module.
2017-01-01 Gavin Smith <address@hidden>
Modified: trunk/tp/Texinfo/MiscXS/MiscXS.xs
===================================================================
--- trunk/tp/Texinfo/MiscXS/MiscXS.xs 2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/MiscXS.xs 2017-01-01 20:56:23 UTC (rev 7600)
@@ -99,3 +99,36 @@
OUTPUT:
RETVAL
+
+void
+xs_parse_texi_regex (text)
+ SV *text
+ PREINIT:
+ char *at_command;
+ char *open_brace;
+ char *asterisk;
+ char *single_letter_command;
+ char *separator_match;
+ char *new_text;
+ PPCODE:
+ xs_parse_texi_regex(text, &at_command, &open_brace, &asterisk,
+ &single_letter_command, &separator_match, &new_text);
+ EXTEND(SP,6);
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(0), at_command);
+ SvUTF8_on(ST(0));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(1), open_brace);
+ SvUTF8_on(ST(1));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(2), asterisk);
+ SvUTF8_on(ST(2));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(3), single_letter_command);
+ SvUTF8_on(ST(3));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(4), separator_match);
+ SvUTF8_on(ST(4));
+ PUSHs(sv_newmortal());
+ sv_setpv((SV*)ST(5), new_text);
+ SvUTF8_on(ST(5));
Modified: trunk/tp/Texinfo/MiscXS/miscxs.c
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.c 2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/miscxs.c 2017-01-01 20:56:23 UTC (rev 7600)
@@ -553,3 +553,80 @@
new[new_len] = '\0';
return new;
}
+
+/* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
+ $separator_match) */
+void xs_parse_texi_regex (SV *text_in,
+ char **at_command,
+ char **open_brace,
+ char **asterisk,
+ char **single_letter_command,
+ char **separator_match,
+ char **new_text)
+{
+ char *text;
+
+ dTHX;
+
+ /* Make sure the input is in UTF8. */
+ if (!SvUTF8 (text_in))
+ sv_utf8_upgrade (text_in);
+ text = SvPV_nolen (text_in);
+
+ *at_command = *open_brace = *asterisk = *single_letter_command
+ = *separator_match = *new_text = 0;
+
+ if (*text == '@' && isalnum(text[1]))
+ {
+ char *p, *q;
+
+ p = text + 1;
+ q = text + 2;
+ while (isalnum (*q) || *q == '-' || *q == '_')
+ q++;
+ *at_command = strndup (p, q - p);
+ }
+ else
+ {
+ if (*text == '{')
+ {
+ *open_brace = strdup ("{");
+ *separator_match = strdup ("{");
+ }
+
+ else if (*text == '@'
+ && text[1] && strchr ("([\"'address@hidden,.!?"
+ " \f\n\r\t"
+ "*-^`=:|/\\",
+ text[1]))
+ {
+ *single_letter_command = malloc (2);
+ (*single_letter_command)[0] = text[1];
+ (*single_letter_command)[1] = '\0';
+ }
+
+ else if (strchr ("{}@,:\t.\f", *text))
+ {
+ *separator_match = malloc (2);
+ (*separator_match)[0] = *text;
+ (*separator_match)[1] = '\0';
+ }
+
+ else
+ {
+ char *p;
+
+ if (*text == '*')
+ {
+ *asterisk = strdup ("*");
+ }
+
+ p = text;
+ p += strcspn (p, "{}@,:\t.\n\f");
+ if (p > text)
+ *new_text = strndup (text, p - text);
+ }
+ }
+
+ return;
+}
Modified: trunk/tp/Texinfo/MiscXS/miscxs.h
===================================================================
--- trunk/tp/Texinfo/MiscXS/miscxs.h 2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/MiscXS/miscxs.h 2017-01-01 20:56:23 UTC (rev 7600)
@@ -2,3 +2,10 @@
char *xs_process_text (char *text);
HV *xs_merge_text (HV *self, HV *current, SV *text_in);
int xs_abort_empty_line (HV *self, HV *current, SV *additional_text);
+void xs_parse_texi_regex (SV *text,
+ char **,
+ char **,
+ char **,
+ char **,
+ char **,
+ char **);
Modified: trunk/tp/Texinfo/Parser.pm
===================================================================
--- trunk/tp/Texinfo/Parser.pm 2017-01-01 15:18:42 UTC (rev 7599)
+++ trunk/tp/Texinfo/Parser.pm 2017-01-01 20:56:23 UTC (rev 7600)
@@ -3515,6 +3515,37 @@
if (defined($marked_as_invalid_command));
}
}
+
+UNITCHECK {
+ Texinfo::XSLoader::override ("Texinfo::Parser::_parse_texi_regex",
+ "Texinfo::MiscXS::parse_texi_regex");
+}
+
+# This combines several regular expressions used in '_parse_texi' to
+# look at what is next on the remaining part of the line.
+sub _parse_texi_regex {
+ my ($line) = @_;
+
+ my ($at_command, $open_brace, $asterisk, $single_letter_command,
+ $separator_match, $misc_text)
+ = ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)
+ |^(\{)
+ |^(\*)
+ |^\@(["'address@hidden,\.!\?\s\*\-\^`=:\|\/\\])
+ |^([{}@,:\t.\f])
+ |^([^{}@,:\t.\n\f]+)
+ /x);
+
+ if ($open_brace) {
+ $separator_match = $open_brace;
+ } elsif ($asterisk) {
+ ($misc_text) = ($line =~ /^([^{}@,:\t.\n\f]+)/);
+ }
+
+ return ($at_command, $open_brace, $asterisk, $single_letter_command,
+ $separator_match, $misc_text);
+}
+
# the different types
#c 'menu_entry'
#c 'menu_entry'
@@ -3776,13 +3807,13 @@
# handle user defined macros before anything else since
# their expansion may lead to changes in the line
# REMACRO
- my $at_command = undef;
my $at_command_length;
- if ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)/g) {
- $at_command = $1;
- # Get length with pos instead of length($1) for efficiency
- $at_command_length = pos($line);
- pos($line) = 0;
+
+ my ($at_command, $open_brace, $asterisk, $single_letter_command,
+ $separator_match, $misc_text) = _parse_texi_regex ($line);
+
+ if ($at_command) {
+ $at_command_length = length($at_command) + 1;
}
if ($at_command
and ($self->{'macros'}->{$at_command}
@@ -3877,7 +3908,7 @@
} elsif ($current->{'cmdname'} and
(defined($brace_commands{$current->{'cmdname'}}) or
$self->{'definfoenclose'}->{$current->{'cmdname'}})
- and $line !~ /^{/) {
+ and !$open_brace) {
# special case for @-command as argument of @itemize or @*table.
if (_command_with_command_as_argument($current->{'parent'})) {
print STDERR "FOR PARENT
address@hidden>{'parent'}->{'parent'}->{'cmdname'} command_as_argument
$current->{'cmdname'}\n" if ($self->{'DEBUG'});
@@ -3946,7 +3977,7 @@
and $current->{'parent'}->{'type'}
and ($current->{'parent'}->{'type'} eq 'menu_comment'
or $current->{'parent'}->{'type'} eq
'menu_entry_description')
- and $line =~ /^\*/
+ and $asterisk
and @{$current->{'contents'}}
and $current->{'contents'}->[-1]->{'type'}
and $current->{'contents'}->[-1]->{'type'} eq 'empty_line'
@@ -4050,11 +4081,11 @@
$current = _enter_menu_entry_node($self, $current, $line_nr);
}
# REMACRO
- } elsif ($at_command
- or $line =~
s/^\@(["'address@hidden,\.!\?\s\*\-\^`=:\|\/\\])//o) {
+ } elsif ($at_command or $single_letter_command) {
my $command;
if (!$at_command) {
- $command = $1;
+ $command = $single_letter_command;
+ substr($line, 0, 2) = '';
} else {
$command = $at_command;
substr($line, 0, $at_command_length) = '';
@@ -4815,9 +4846,9 @@
$self->line_error(sprintf($self->__("unknown command `%s'"),
$command), $line_nr);
}
-
- } elsif ($line =~ s/^([{}@,:\t.\f])//) {
- my $separator = $1;
+ } elsif ($separator_match) {
+ my $separator = $separator_match;
+ substr ($line, 0, 1) = '';
print STDERR "SEPARATOR: $separator\n" if ($self->{'DEBUG'});
if ($separator eq '@') {
# this may happen with a @ at the very end of a file, therefore
@@ -5341,8 +5372,9 @@
$current = _merge_text($self, $current, $separator);
}
# Misc text except end of line
- } elsif ($line =~ s/^([^{}@,:\t.\n\f]+)//) {
- my $new_text = $1;
+ } elsif (defined $misc_text) {
+ my $new_text = $misc_text;
+ substr ($line, 0, length ($misc_text)) = '';
$current = _merge_text($self, $current, $new_text);
# end of line
} else {
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [7600] xs_parse_texi_regex,
gavinsmith0123 <=