[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: reformatting dfa.c, now is the time
From: |
Jim Meyering |
Subject: |
Re: reformatting dfa.c, now is the time |
Date: |
Thu, 01 Mar 2012 19:30:30 +0100 |
Aharon Robbins wrote:
...
> Go for it. I'm carrying a few patches but I can merge them in again
> after doing a pull. They won't go into gawk 4.0.1 anyway.
Thanks for the quick reply.
Here's the proposed big-and-automatic patch, followed by a small,
manual fix-up of some code in cpp definitions, albeit not all.
E.g., I left some cuddled braces.
The HOME=. is to avoid using my ~/.indent.pro file.
>From 565fd35b0630c015cd4ac59dd164c1d1eb75cbf6 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Thu, 1 Mar 2012 17:19:19 +0100
Subject: [PATCH 1/2] maint: indent dfa.c
* src/dfa.c: Filter through indent like this:
HOME=. indent -Tsize_t -l79 --leave-preprocessor-space \
--dont-format-comments --no-tabs dfa.c > k && mv k dfa.c
---
src/dfa.c | 1585 +++++++++++++++++++++++++++++++------------------------------
1 file changed, 801 insertions(+), 784 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 4c9071f..ecceb3b 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -46,7 +46,7 @@
#include "gettext.h"
#define _(str) gettext (str)
-#include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */
+#include "mbsupport.h" /* defines MBS_SUPPORT if appropriate */
#include <wchar.h>
#include <wctype.h>
@@ -89,7 +89,11 @@ typedef int charclass[CHARCLASS_INTS];
/* Convert a possibly-signed character to an unsigned character. This is
a bit safer than casting to unsigned char, since it catches some type
errors that the cast doesn't. */
-static inline unsigned char to_uchar (char ch) { return ch; }
+static inline unsigned char
+to_uchar (char ch)
+{
+ return ch;
+}
/* Contexts tell us whether a character is a newline or a word constituent.
Word-constituent characters are those that satisfy iswalnum(), plus '_'.
@@ -163,7 +167,7 @@ typedef ptrdiff_t token;
/* Predefined token values. */
enum
{
- END = -1, /* END is a terminal symbol that matches the
+ END = -1, /* END is a terminal symbol that matches the
end of input; any value of END or less in
the parse tree is such a symbol. Accepting
states of the DFA are those that would have
@@ -171,108 +175,107 @@ enum
/* Ordinary character values are terminal symbols that match themselves. */
- EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
the empty string. */
- BACKREF, /* BACKREF is generated by \<digit>; it
+ BACKREF, /* BACKREF is generated by \<digit>; it
is not completely handled. If the scanner
detects a transition on backref, it returns
a kind of "semi-success" indicating that
the match will have to be verified with
a backtracking matcher. */
- BEGLINE, /* BEGLINE is a terminal symbol that matches
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
the empty string if it is at the beginning
of a line. */
- ENDLINE, /* ENDLINE is a terminal symbol that matches
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
the empty string if it is at the end of
a line. */
- BEGWORD, /* BEGWORD is a terminal symbol that matches
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
the empty string if it is at the beginning
of a word. */
- ENDWORD, /* ENDWORD is a terminal symbol that matches
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
the empty string if it is at the end of
a word. */
- LIMWORD, /* LIMWORD is a terminal symbol that matches
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
the empty string if it is at the beginning
or the end of a word. */
- NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
matches the empty string if it is not at
the beginning or end of a word. */
- QMARK, /* QMARK is an operator of one argument that
+ QMARK, /* QMARK is an operator of one argument that
matches zero or one occurences of its
argument. */
- STAR, /* STAR is an operator of one argument
that
+ STAR, /* STAR is an operator of one argument that
matches the Kleene closure (zero or more
occurrences) of its argument. */
- PLUS, /* PLUS is an operator of one argument
that
+ PLUS, /* PLUS is an operator of one argument that
matches the positive closure (one or more
occurrences) of its argument. */
- REPMN, /* REPMN is a lexical token corresponding
+ REPMN, /* REPMN is a lexical token corresponding
to the {m,n} construct. REPMN never
appears in the compiled token vector. */
- CAT, /* CAT is an operator of two arguments that
+ CAT, /* CAT is an operator of two arguments that
matches the concatenation of its
arguments. CAT is never returned by the
lexical analyzer. */
- OR, /* OR is an operator of two arguments that
+ OR, /* OR is an operator of two arguments that
matches either of its arguments. */
- LPAREN, /* LPAREN never appears in the parse tree,
+ LPAREN, /* LPAREN never appears in the parse tree,
it is only a lexeme. */
- RPAREN, /* RPAREN never appears in the parse tree. */
+ RPAREN, /* RPAREN never appears in the parse tree. */
- ANYCHAR, /* ANYCHAR is a terminal symbol that matches
- any multibyte (or single byte) characters.
- It is used only if MB_CUR_MAX > 1. */
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ any multibyte (or single byte) characters.
+ It is used only if MB_CUR_MAX > 1. */
- MBCSET, /* MBCSET is similar to CSET, but for
+ MBCSET, /* MBCSET is similar to CSET, but for
multibyte characters. */
- WCHAR, /* Only returned by lex. wctok contains
+ WCHAR, /* Only returned by lex. wctok contains
the wide character representation. */
- CSET /* CSET and (and any value greater) is a
+ CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
};
-
/* States of the recognizer correspond to sets of positions in the parse
tree, together with the constraints under which they may be matched.
So a position is encoded as an index into the parse tree together with
a constraint. */
typedef struct
{
- size_t index; /* Index into the parse array. */
- unsigned int constraint; /* Constraint for matching this position. */
+ size_t index; /* Index into the parse array. */
+ unsigned int constraint; /* Constraint for matching this position. */
} position;
/* Sets of positions are stored as arrays. */
typedef struct
{
- position *elems; /* Elements of this position set. */
- size_t nelem; /* Number of elements in this set. */
- size_t alloc; /* Number of elements allocated in
ELEMS. */
+ position *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
+ size_t alloc; /* Number of elements allocated in ELEMS. */
} position_set;
/* Sets of leaves are also stored as arrays. */
typedef struct
{
- size_t *elems; /* Elements of this position set. */
- size_t nelem; /* Number of elements in this set. */
+ size_t *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
} leaf_set;
/* A state of the dfa consists of a set of positions, some flags,
@@ -280,16 +283,16 @@ typedef struct
contains an END token. */
typedef struct
{
- size_t hash; /* Hash of the positions of this state. */
- position_set elems; /* Positions this state could match. */
- unsigned char context; /* Context from previous state. */
- char backref; /* True if this state matches a
\<digit>. */
- unsigned short constraint; /* Constraint for this state to accept. */
- token first_end; /* Token value of the first END in elems. */
- position_set mbps; /* Positions which can match multibyte
- characters. e.g. period.
- These staff are used only if
- MB_CUR_MAX > 1. */
+ size_t hash; /* Hash of the positions of this state. */
+ position_set elems; /* Positions this state could match. */
+ unsigned char context; /* Context from previous state. */
+ char backref; /* True if this state matches a \<digit>. */
+ unsigned short constraint; /* Constraint for this state to accept. */
+ token first_end; /* Token value of the first END in elems. */
+ position_set mbps; /* Positions which can match multibyte
+ characters. e.g. period.
+ These staff are used only if
+ MB_CUR_MAX > 1. */
} dfa_state;
/* States are indexed by state_num values. These are normally
@@ -302,59 +305,59 @@ struct mb_char_classes
{
ptrdiff_t cset;
int invert;
- wchar_t *chars; /* Normal characters. */
+ wchar_t *chars; /* Normal characters. */
size_t nchars;
- wctype_t *ch_classes; /* Character classes. */
+ wctype_t *ch_classes; /* Character classes. */
size_t nch_classes;
- wchar_t *range_sts; /* Range characters (start of the range). */
- wchar_t *range_ends; /* Range characters (end of the range). */
+ wchar_t *range_sts; /* Range characters (start of the range). */
+ wchar_t *range_ends; /* Range characters (end of the range). */
size_t nranges;
- char **equivs; /* Equivalent classes. */
+ char **equivs; /* Equivalent classes. */
size_t nequivs;
char **coll_elems;
- size_t ncoll_elems; /* Collating elements. */
+ size_t ncoll_elems; /* Collating elements. */
};
/* A compiled regular expression. */
struct dfa
{
/* Fields filled by the scanner. */
- charclass *charclasses; /* Array of character sets for CSET tokens. */
- size_t cindex; /* Index for adding new charclasses. */
- size_t calloc; /* Number of charclasses currently allocated. */
+ charclass *charclasses; /* Array of character sets for CSET tokens. */
+ size_t cindex; /* Index for adding new charclasses. */
+ size_t calloc; /* Number of charclasses currently allocated.
*/
/* Fields filled by the parser. */
- token *tokens; /* Postfix parse array. */
- size_t tindex; /* Index for adding new tokens. */
- size_t talloc; /* Number of tokens currently allocated. */
- size_t depth; /* Depth required of an evaluation stack
+ token *tokens; /* Postfix parse array. */
+ size_t tindex; /* Index for adding new tokens. */
+ size_t talloc; /* Number of tokens currently allocated. */
+ size_t depth; /* Depth required of an evaluation stack
used for depth-first traversal of the
parse tree. */
- size_t nleaves; /* Number of leaves on the parse tree. */
- size_t nregexps; /* Count of parallel regexps being built
+ size_t nleaves; /* Number of leaves on the parse tree. */
+ size_t nregexps; /* Count of parallel regexps being built
with dfaparse(). */
- unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
- token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
+ unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
+ token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales.
*/
/* The following are used only if MB_CUR_MAX > 1. */
/* The value of multibyte_prop[i] is defined by following rule.
- if tokens[i] < NOTCHAR
- bit 0 : tokens[i] is the first byte of a character, including
- single-byte characters.
- bit 1 : tokens[i] is the last byte of a character, including
- single-byte characters.
+ if tokens[i] < NOTCHAR
+ bit 0 : tokens[i] is the first byte of a character, including
+ single-byte characters.
+ bit 1 : tokens[i] is the last byte of a character, including
+ single-byte characters.
- if tokens[i] = MBCSET
- ("the index of mbcsets correspnd to this operator" << 2) + 3
+ if tokens[i] = MBCSET
+ ("the index of mbcsets correspnd to this operator" << 2) + 3
e.g.
tokens
- = 'single_byte_a', 'multi_byte_A', single_byte_b'
- = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+ = 'single_byte_a', 'multi_byte_A', single_byte_b'
+ = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
multibyte_prop
- = 3 , 1 , 0 , 2 , 3
- */
+ = 3 , 1 , 0 , 2 , 3
+ */
size_t nmultibyte_prop;
int *multibyte_prop;
@@ -364,19 +367,19 @@ struct dfa
size_t mbcsets_alloc;
/* Fields filled by the state builder. */
- dfa_state *states; /* States of the dfa. */
- state_num sindex; /* Index for adding new states. */
- state_num salloc; /* Number of states currently allocated. */
+ dfa_state *states; /* States of the dfa. */
+ state_num sindex; /* Index for adding new states. */
+ state_num salloc; /* Number of states currently allocated. */
/* Fields filled by the parse tree->NFA conversion. */
- position_set *follows; /* Array of follow sets, indexed by position
+ position_set *follows; /* Array of follow sets, indexed by position
index. The follow of a position is the set
of positions containing characters that
could conceivably follow a character
matching the given position in a string
matching the regexp. Allocated to the
maximum possible position index. */
- int searchflag; /* True if we are supposed to build a searching
+ int searchflag; /* True if we are supposed to build a searching
as opposed to an exact matcher. A searching
matcher finds the first and shortest string
matching a regexp anywhere in the buffer,
@@ -385,29 +388,29 @@ struct dfa
beginning of the buffer. */
/* Fields filled by dfaexec. */
- state_num tralloc; /* Number of transition tables that have
+ state_num tralloc; /* Number of transition tables that have
slots so far. */
- int trcount; /* Number of transition tables that have
+ int trcount; /* Number of transition tables that have
actually been built. */
- state_num **trans; /* Transition tables for states that can
+ state_num **trans; /* Transition tables for states that can
never accept. If the transitions for a
state have not yet been computed, or the
state could possibly accept, its entry in
this table is NULL. */
- state_num **realtrans; /* Trans always points to realtrans + 1; this
+ state_num **realtrans; /* Trans always points to realtrans + 1; this
is so trans[-1] can contain NULL. */
- state_num **fails; /* Transition tables after failing to accept
+ state_num **fails; /* Transition tables after failing to accept
on a state that potentially could do so. */
- int *success; /* Table of acceptance conditions used
in
+ int *success; /* Table of acceptance conditions used in
dfaexec and computed in build_state. */
- state_num *newlines; /* Transitions on newlines. The entry for a
+ state_num *newlines; /* Transitions on newlines. The entry for a
newline in any transition table is always
-1 so we can count lines without wasting
too many cycles. The transition for a
newline is stored separately and handled
as a special case. Newline is also used
as a sentinel at the end of the buffer. */
- struct dfamust *musts; /* List of strings, at least one of which
+ struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
};
@@ -459,7 +462,6 @@ static void regexp (void);
} \
while (false)
-
#ifdef DEBUG
static void
@@ -468,36 +470,72 @@ prtok (token t)
char const *s;
if (t < 0)
- fprintf(stderr, "END");
+ fprintf (stderr, "END");
else if (t < NOTCHAR)
{
int ch = t;
- fprintf(stderr, "%c", ch);
+ fprintf (stderr, "%c", ch);
}
else
{
switch (t)
{
- case EMPTY: s = "EMPTY"; break;
- case BACKREF: s = "BACKREF"; break;
- case BEGLINE: s = "BEGLINE"; break;
- case ENDLINE: s = "ENDLINE"; break;
- case BEGWORD: s = "BEGWORD"; break;
- case ENDWORD: s = "ENDWORD"; break;
- case LIMWORD: s = "LIMWORD"; break;
- case NOTLIMWORD: s = "NOTLIMWORD"; break;
- case QMARK: s = "QMARK"; break;
- case STAR: s = "STAR"; break;
- case PLUS: s = "PLUS"; break;
- case CAT: s = "CAT"; break;
- case OR: s = "OR"; break;
- case LPAREN: s = "LPAREN"; break;
- case RPAREN: s = "RPAREN"; break;
- case ANYCHAR: s = "ANYCHAR"; break;
- case MBCSET: s = "MBCSET"; break;
- default: s = "CSET"; break;
+ case EMPTY:
+ s = "EMPTY";
+ break;
+ case BACKREF:
+ s = "BACKREF";
+ break;
+ case BEGLINE:
+ s = "BEGLINE";
+ break;
+ case ENDLINE:
+ s = "ENDLINE";
+ break;
+ case BEGWORD:
+ s = "BEGWORD";
+ break;
+ case ENDWORD:
+ s = "ENDWORD";
+ break;
+ case LIMWORD:
+ s = "LIMWORD";
+ break;
+ case NOTLIMWORD:
+ s = "NOTLIMWORD";
+ break;
+ case QMARK:
+ s = "QMARK";
+ break;
+ case STAR:
+ s = "STAR";
+ break;
+ case PLUS:
+ s = "PLUS";
+ break;
+ case CAT:
+ s = "CAT";
+ break;
+ case OR:
+ s = "OR";
+ break;
+ case LPAREN:
+ s = "LPAREN";
+ break;
+ case RPAREN:
+ s = "RPAREN";
+ break;
+ case ANYCHAR:
+ s = "ANYCHAR";
+ break;
+ case MBCSET:
+ s = "MBCSET";
+ break;
+ default:
+ s = "CSET";
+ break;
}
- fprintf(stderr, "%s", s);
+ fprintf (stderr, "%s", s);
}
}
#endif /* DEBUG */
@@ -559,11 +597,11 @@ charclass_index (charclass const s)
size_t i;
for (i = 0; i < dfa->cindex; ++i)
- if (equal(s, dfa->charclasses[i]))
+ if (equal (s, dfa->charclasses[i]))
return i;
- REALLOC_IF_NECESSARY(dfa->charclasses, dfa->calloc, dfa->cindex + 1);
+ REALLOC_IF_NECESSARY (dfa->charclasses, dfa->calloc, dfa->cindex + 1);
++dfa->cindex;
- copyset(s, dfa->charclasses[i]);
+ copyset (s, dfa->charclasses[i]);
return i;
}
@@ -609,9 +647,9 @@ char_context (unsigned char c)
}
static int
-wchar_context(wint_t wc)
+wchar_context (wint_t wc)
{
- if (wc == (wchar_t)eolbyte || wc == 0)
+ if (wc == (wchar_t) eolbyte || wc == 0)
return CTX_NEWLINE;
if (wc == L'_' || iswalnum (wc))
return CTX_LETTER;
@@ -677,8 +715,7 @@ static inline bool
setbit_wc (wint_t wc, charclass c)
{
abort ();
- /*NOTREACHED*/
- return false;
+ /*NOTREACHED*/ return false;
}
#endif
@@ -706,8 +743,6 @@ setbit_case_fold_c (int b, charclass c)
}
}
-
-
/* UTF-8 encoding allows some optimizations that we can't otherwise
assume in a multibyte encoding. */
static inline int
@@ -731,40 +766,39 @@ using_utf8 (void)
reader is referred to the GNU Regex documentation for the
meaning of the @address@hidden@ syntax bits. */
-static char const *lexptr; /* Pointer to next input character. */
-static size_t lexleft; /* Number of characters remaining. */
-static token lasttok; /* Previous token returned; initially END. */
-static int laststart; /* True if we're separated from beginning or (,
|
+static char const *lexptr; /* Pointer to next input character. */
+static size_t lexleft; /* Number of characters remaining. */
+static token lasttok; /* Previous token returned; initially END. */
+static int laststart; /* True if we're separated from beginning or
(, |
only by zero-width characters. */
-static size_t parens; /* Count of outstanding left parens. */
-static int minrep, maxrep; /* Repeat counts for {m,n}. */
-static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
+static size_t parens; /* Count of outstanding left parens. */
+static int minrep, maxrep; /* Repeat counts for {m,n}. */
+static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */
-static int cur_mb_len = 1; /* Length of the multibyte representation of
+static int cur_mb_len = 1; /* Length of the multibyte representation of
wctok. */
/* These variables are used only if (MB_CUR_MAX > 1). */
-static mbstate_t mbs; /* Mbstate for mbrlen(). */
-static wchar_t wctok; /* Wide character representation of the current
+static mbstate_t mbs; /* Mbstate for mbrlen(). */
+static wchar_t wctok; /* Wide character representation of the current
multibyte character. */
-static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
- Each element store the amount of remain
- byte of corresponding multibyte character
- in the input string. A element's value
- is 0 if corresponding character is a
- single byte chracter.
- e.g. input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
- mblen_buf : 0, 3, 2, 1
- */
-static wchar_t *inputwcs; /* Wide character representation of input
+static unsigned char *mblen_buf; /* Correspond to the input buffer in
dfaexec().
+ Each element store the amount of
remain
+ byte of corresponding multibyte
character
+ in the input string. A element's
value
+ is 0 if corresponding character is a
+ single byte chracter.
+ e.g. input : 'a', <mb(0)>, <mb(1)>,
<mb(2)>
+ mblen_buf : 0, 3, 2,
1
+ */
+static wchar_t *inputwcs; /* Wide character representation of input
string in dfaexec().
The length of this array is same as
the length of input string(char array).
inputstring[i] is a single-byte char,
or 1st byte of a multibyte char.
And inputwcs[i] is the codepoint. */
-static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */
-static unsigned char const *buf_end; /* reference to end in dfaexec(). */
-
+static unsigned char const *buf_begin; /* reference to begin in dfaexec(). */
+static unsigned char const *buf_end; /* reference to end in dfaexec(). */
#if MBS_SUPPORT
/* Note that characters become unsigned here. */
@@ -827,26 +861,27 @@ typedef int predicate (int);
/* The following list maps the names of the Posix named character classes
to predicate functions that determine whether a given character is in
the class. The leading [ has already been eaten by the lexical analyzer. */
-struct dfa_ctype {
+struct dfa_ctype
+{
const char *name;
predicate *func;
bool single_byte_only;
};
static const struct dfa_ctype prednames[] = {
- { "alpha", isalpha, false },
- { "upper", isupper, false },
- { "lower", islower, false },
- { "digit", isdigit, true },
- { "xdigit", isxdigit, true },
- { "space", isspace, false },
- { "punct", ispunct, false },
- { "alnum", isalnum, false },
- { "print", isprint, false },
- { "graph", isgraph, false },
- { "cntrl", iscntrl, false },
- { "blank", isblank, false },
- { NULL, NULL, false }
+ {"alpha", isalpha, false},
+ {"upper", isupper, false},
+ {"lower", islower, false},
+ {"digit", isdigit, true},
+ {"xdigit", isxdigit, true},
+ {"space", isspace, false},
+ {"punct", ispunct, false},
+ {"alnum", isalnum, false},
+ {"print", isprint, false},
+ {"graph", isgraph, false},
+ {"cntrl", iscntrl, false},
+ {"blank", isblank, false},
+ {NULL, NULL, false}
};
static const struct dfa_ctype * _GL_ATTRIBUTE_PURE
@@ -891,7 +926,8 @@ parse_bracket_exp (void)
ch_classes_al = equivs_al = coll_elems_al = 0;
if (MB_CUR_MAX > 1)
{
- REALLOC_IF_NECESSARY(dfa->mbcsets, dfa->mbcsets_alloc, dfa->nmbcsets +
1);
+ REALLOC_IF_NECESSARY (dfa->mbcsets, dfa->mbcsets_alloc,
+ dfa->nmbcsets + 1);
/* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
We will update dfa->multibyte_prop[] in addtok(), because we can't
@@ -917,7 +953,7 @@ parse_bracket_exp (void)
colon_warning_state = (c == ':');
do
{
- c1 = EOF; /* mark c1 is not initialized". */
+ c1 = EOF; /* mark c1 is not initialized". */
colon_warning_state &= ~2;
/* Note that if we're looking at some other [:...:] construct,
@@ -933,8 +969,7 @@ parse_bracket_exp (void)
/* If pattern contains `[[:', `[[.', or `[[='. */
if (c1 == ':'
/* TODO: handle `[[.' and `[[=' also for MB_CUR_MAX == 1. */
- || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '='))
- )
+ || (MB_CUR_MAX > 1 && (c1 == '.' || c1 == '=')))
{
size_t len = 0;
for (;;)
@@ -956,51 +991,48 @@ parse_bracket_exp (void)
/* build character class. */
{
char const *class
- = (case_fold && (STREQ (str, "upper")
- || STREQ (str, "lower"))
- ? "alpha"
- : str);
+ = (case_fold && (STREQ (str, "upper")
+ || STREQ (str, "lower")) ? "alpha" : str);
const struct dfa_ctype *pred = find_pred (class);
if (!pred)
- dfaerror(_("invalid character class"));
+ dfaerror (_("invalid character class"));
if (MB_CUR_MAX > 1 && !pred->single_byte_only)
{
/* Store the character class as wctype_t. */
wctype_t wt = wctype (class);
- REALLOC_IF_NECESSARY(work_mbc->ch_classes,
- ch_classes_al,
- work_mbc->nch_classes + 1);
+ REALLOC_IF_NECESSARY (work_mbc->ch_classes,
+ ch_classes_al,
+ work_mbc->nch_classes + 1);
work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
}
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (pred->func(c2))
+ if (pred->func (c2))
setbit_case_fold_c (c2, ccl);
}
else if (MBS_SUPPORT && (c1 == '=' || c1 == '.'))
{
char *elem;
- MALLOC(elem, len + 1);
- strncpy(elem, str, len + 1);
+ MALLOC (elem, len + 1);
+ strncpy (elem, str, len + 1);
if (c1 == '=')
/* build equivalent class. */
{
- REALLOC_IF_NECESSARY(work_mbc->equivs,
- equivs_al,
- work_mbc->nequivs + 1);
+ REALLOC_IF_NECESSARY (work_mbc->equivs,
+ equivs_al, work_mbc->nequivs + 1);
work_mbc->equivs[work_mbc->nequivs++] = elem;
}
if (c1 == '.')
/* build collating element. */
{
- REALLOC_IF_NECESSARY(work_mbc->coll_elems,
- coll_elems_al,
- work_mbc->ncoll_elems + 1);
+ REALLOC_IF_NECESSARY (work_mbc->coll_elems,
+ coll_elems_al,
+ work_mbc->ncoll_elems + 1);
work_mbc->coll_elems[work_mbc->ncoll_elems++] = elem;
}
}
@@ -1016,15 +1048,15 @@ parse_bracket_exp (void)
}
if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC(c, wc, _("unbalanced ["));
+ FETCH_WC (c, wc, _("unbalanced ["));
if (c1 == EOF)
- FETCH_WC(c1, wc1, _("unbalanced ["));
+ FETCH_WC (c1, wc1, _("unbalanced ["));
if (c1 == '-')
/* build range characters. */
{
- FETCH_WC(c2, wc2, _("unbalanced ["));
+ FETCH_WC (c2, wc2, _("unbalanced ["));
if (c2 == ']')
{
/* In the case [x-], the - is an ordinary hyphen,
@@ -1036,32 +1068,31 @@ parse_bracket_exp (void)
if (c1 == '-' && c2 != ']')
{
- if (c2 == '\\'
- && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC(c2, wc2, _("unbalanced ["));
+ if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (c2, wc2, _("unbalanced ["));
if (MB_CUR_MAX > 1)
{
/* When case folding map a range, say [m-z] (or even [M-z])
to the pair of ranges, [m-z] [M-Z]. */
- REALLOC_IF_NECESSARY(work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- REALLOC_IF_NECESSARY(work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
+ REALLOC_IF_NECESSARY (work_mbc->range_sts,
+ range_sts_al, work_mbc->nranges + 1);
+ REALLOC_IF_NECESSARY (work_mbc->range_ends,
+ range_ends_al, work_mbc->nranges + 1);
work_mbc->range_sts[work_mbc->nranges] =
- case_fold ? towlower(wc) : (wchar_t)wc;
+ case_fold ? towlower (wc) : (wchar_t) wc;
work_mbc->range_ends[work_mbc->nranges++] =
- case_fold ? towlower(wc2) : (wchar_t)wc2;
+ case_fold ? towlower (wc2) : (wchar_t) wc2;
#ifndef GREP
- if (case_fold && (iswalpha(wc) || iswalpha(wc2)))
+ if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
{
- REALLOC_IF_NECESSARY(work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- work_mbc->range_sts[work_mbc->nranges] = towupper(wc);
- REALLOC_IF_NECESSARY(work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
- work_mbc->range_ends[work_mbc->nranges++] = towupper(wc2);
+ REALLOC_IF_NECESSARY (work_mbc->range_sts,
+ range_sts_al, work_mbc->nranges + 1);
+ work_mbc->range_sts[work_mbc->nranges] = towupper (wc);
+ REALLOC_IF_NECESSARY (work_mbc->range_ends,
+ range_ends_al, work_mbc->nranges + 1);
+ work_mbc->range_ends[work_mbc->nranges++] = towupper (wc2);
}
#endif
}
@@ -1096,7 +1127,7 @@ parse_bracket_exp (void)
}
colon_warning_state |= 8;
- FETCH_WC(c1, wc1, _("unbalanced ["));
+ FETCH_WC (c1, wc1, _("unbalanced ["));
continue;
}
@@ -1108,25 +1139,25 @@ parse_bracket_exp (void)
continue;
}
- if (case_fold && iswalpha(wc))
+ if (case_fold && iswalpha (wc))
{
- wc = towlower(wc);
+ wc = towlower (wc);
if (!setbit_wc (wc, ccl))
{
- REALLOC_IF_NECESSARY(work_mbc->chars, chars_al,
- work_mbc->nchars + 1);
+ REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
+ work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
#ifdef GREP
continue;
#else
- wc = towupper(wc);
+ wc = towupper (wc);
#endif
}
if (!setbit_wc (wc, ccl))
{
- REALLOC_IF_NECESSARY(work_mbc->chars, chars_al,
- work_mbc->nchars + 1);
+ REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
+ work_mbc->nchars + 1);
work_mbc->chars[work_mbc->nchars++] = wc;
}
}
@@ -1139,19 +1170,19 @@ parse_bracket_exp (void)
{
static charclass zeroclass;
work_mbc->invert = invert;
- work_mbc->cset = equal(ccl, zeroclass) ? -1 : charclass_index(ccl);
+ work_mbc->cset = equal (ccl, zeroclass) ? -1 : charclass_index (ccl);
return MBCSET;
}
if (invert)
{
- assert(MB_CUR_MAX == 1);
- notset(ccl);
+ assert (MB_CUR_MAX == 1);
+ notset (ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
- clrbit(eolbyte, ccl);
+ clrbit (eolbyte, ccl);
}
- return CSET + charclass_index(ccl);
+ return CSET + charclass_index (ccl);
}
static token
@@ -1173,11 +1204,11 @@ lex (void)
if (MB_CUR_MAX > 1)
{
FETCH_WC (c, wctok, NULL);
- if ((int)c == EOF)
+ if ((int) c == EOF)
goto normal_char;
}
else
- FETCH(c, NULL);
+ FETCH (c, NULL);
switch (c)
{
@@ -1185,7 +1216,7 @@ lex (void)
if (backslash)
goto normal_char;
if (lexleft == 0)
- dfaerror(_("unfinished \\ escape"));
+ dfaerror (_("unfinished \\ escape"));
backslash = 1;
break;
@@ -1193,9 +1224,7 @@ lex (void)
if (backslash)
goto normal_char;
if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
- || lasttok == END
- || lasttok == LPAREN
- || lasttok == OR)
+ || lasttok == END || lasttok == LPAREN || lasttok == OR)
return lasttok = BEGLINE;
goto normal_char;
@@ -1233,12 +1262,12 @@ lex (void)
case '`':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = BEGLINE; /* FIXME: should be beginning of string
*/
+ return lasttok = BEGLINE; /* FIXME: should be beginning of
string */
goto normal_char;
case '\'':
if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = ENDLINE; /* FIXME: should be end of string */
+ return lasttok = ENDLINE; /* FIXME: should be end of string */
goto normal_char;
case '<':
@@ -1301,7 +1330,7 @@ lex (void)
int lo = -1, hi = -1;
char const *p = lexptr;
char const *lim = p + lexleft;
- for (; p != lim && ISASCIIDIGIT (*p); p++)
+ for (; p != lim && ISASCIIDIGIT (*p); p++)
{
if (lo < 0)
lo = *p - '0';
@@ -1326,8 +1355,7 @@ lex (void)
}
else
hi = lo;
- if (p == lim || *p != '}'
- || lo < 0 || (0 <= hi && hi < lo))
+ if (p == lim || *p != '}' || lo < 0 || (0 <= hi && hi < lo))
goto normal_char;
}
@@ -1336,24 +1364,24 @@ lex (void)
{M} - exact count
{M,} - minimum count, maximum is infinity
{M,N} - M through N */
- FETCH(c, _("unfinished repeat count"));
+ FETCH (c, _("unfinished repeat count"));
if (ISASCIIDIGIT (c))
{
minrep = c - '0';
for (;;)
{
- FETCH(c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
+ FETCH (c, _("unfinished repeat count"));
+ if (!ISASCIIDIGIT (c))
break;
minrep = 10 * minrep + c - '0';
}
}
else
- dfaerror(_("malformed repeat count"));
+ dfaerror (_("malformed repeat count"));
if (c == ',')
{
FETCH (c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
+ if (!ISASCIIDIGIT (c))
maxrep = -1;
else
{
@@ -1361,7 +1389,7 @@ lex (void)
for (;;)
{
FETCH (c, _("unfinished repeat count"));
- if (! ISASCIIDIGIT (c))
+ if (!ISASCIIDIGIT (c))
break;
maxrep = 10 * maxrep + c - '0';
}
@@ -1374,11 +1402,11 @@ lex (void)
if (!(syntax_bits & RE_NO_BK_BRACES))
{
if (c != '\\')
- dfaerror(_("malformed repeat count"));
- FETCH(c, _("unfinished repeat count"));
+ dfaerror (_("malformed repeat count"));
+ FETCH (c, _("unfinished repeat count"));
}
if (c != '}')
- dfaerror(_("malformed repeat count"));
+ dfaerror (_("malformed repeat count"));
laststart = 0;
return lasttok = REPMN;
@@ -1392,8 +1420,7 @@ lex (void)
case '\n':
if (syntax_bits & RE_LIMITED_OPS
- || backslash
- || !(syntax_bits & RE_NEWLINE_ALT))
+ || backslash || !(syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
laststart = 1;
return lasttok = OR;
@@ -1424,46 +1451,46 @@ lex (void)
laststart = 0;
return lasttok = ANYCHAR;
}
- zeroset(ccl);
- notset(ccl);
+ zeroset (ccl);
+ notset (ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
- clrbit(eolbyte, ccl);
+ clrbit (eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
- clrbit('\0', ccl);
+ clrbit ('\0', ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case 's':
case 'S':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- zeroset(ccl);
+ zeroset (ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (isspace(c2))
- setbit(c2, ccl);
+ if (isspace (c2))
+ setbit (c2, ccl);
if (c == 'S')
- notset(ccl);
+ notset (ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case 'w':
case 'W':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- zeroset(ccl);
+ zeroset (ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if (IS_WORD_CONSTITUENT(c2))
- setbit(c2, ccl);
+ if (IS_WORD_CONSTITUENT (c2))
+ setbit (c2, ccl);
if (c == 'W')
- notset(ccl);
+ notset (ccl);
laststart = 0;
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
case '[':
if (backslash)
goto normal_char;
laststart = 0;
- return lasttok = parse_bracket_exp();
+ return lasttok = parse_bracket_exp ();
default:
normal_char:
@@ -1473,11 +1500,11 @@ lex (void)
if (MB_CUR_MAX > 1)
return lasttok = WCHAR;
- if (case_fold && isalpha(c))
+ if (case_fold && isalpha (c))
{
- zeroset(ccl);
+ zeroset (ccl);
setbit_case_fold_c (c, ccl);
- return lasttok = CSET + charclass_index(ccl);
+ return lasttok = CSET + charclass_index (ccl);
}
return lasttok = c;
@@ -1486,14 +1513,14 @@ lex (void)
/* The above loop should consume at most a backslash
and some other character. */
- abort();
- return END; /* keeps pedantic compilers happy. */
+ abort ();
+ return END; /* keeps pedantic compilers happy. */
}
/* Recursive descent parser for regular expressions. */
-static token tok; /* Lookahead token. */
-static size_t depth; /* Current depth of a hypothetical stack
+static token tok; /* Lookahead token. */
+static size_t depth; /* Current depth of a hypothetical stack
holding deferred productions. This is
used to determine the depth that will be
required of the real stack later on in
@@ -1504,12 +1531,12 @@ addtok_mb (token t, int mbprop)
{
if (MB_CUR_MAX > 1)
{
- REALLOC_IF_NECESSARY(dfa->multibyte_prop, dfa->nmultibyte_prop,
- dfa->tindex + 1);
+ REALLOC_IF_NECESSARY (dfa->multibyte_prop, dfa->nmultibyte_prop,
+ dfa->tindex + 1);
dfa->multibyte_prop[dfa->tindex] = mbprop;
}
- REALLOC_IF_NECESSARY(dfa->tokens, dfa->talloc, dfa->tindex + 1);
+ REALLOC_IF_NECESSARY (dfa->tokens, dfa->talloc, dfa->tindex + 1);
dfa->tokens[dfa->tindex++] = t;
switch (t)
@@ -1563,12 +1590,11 @@ addtok (token t)
/* UTF-8 allows treating a simple, non-inverted MBCSET like a CSET. */
if (work_mbc->invert
- || (!using_utf8() && work_mbc->cset != -1)
+ || (!using_utf8 () && work_mbc->cset != -1)
|| work_mbc->nchars != 0
|| work_mbc->nch_classes != 0
|| work_mbc->nranges != 0
- || work_mbc->nequivs != 0
- || work_mbc->ncoll_elems != 0)
+ || work_mbc->nequivs != 0 || work_mbc->ncoll_elems != 0)
{
addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
if (need_or)
@@ -1615,15 +1641,18 @@ addtok_wc (wint_t wc)
if (cur_mb_len <= 0)
buf[0] = 0;
- addtok_mb(buf[0], cur_mb_len == 1 ? 3 : 1);
+ addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
for (i = 1; i < cur_mb_len; i++)
{
- addtok_mb(buf[i], i == cur_mb_len - 1 ? 2 : 0);
- addtok(CAT);
+ addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
+ addtok (CAT);
}
}
#else
-static void addtok_wc (wint_t wc) {}
+static void
+addtok_wc (wint_t wc)
+{
+}
#endif
static void
@@ -1631,11 +1660,11 @@ add_utf8_anychar (void)
{
#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
- { 0, 0, 0, 0, ~0, ~0, 0, 0 }, /* 80-bf: non-lead bytes */
- { ~0, ~0, ~0, ~0, 0, 0, 0, 0 }, /* 00-7f: 1-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0xfffffffcU, 0 }, /* c2-df: 2-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0, 0xffff }, /* e0-ef: 3-byte sequence */
- { 0, 0, 0, 0, 0, 0, 0, 0xff0000 } /* f0-f7: 4-byte sequence */
+ {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-lead bytes */
+ {~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0xfffffffcU, 0}, /* c2-df: 2-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */
+ {0, 0, 0, 0, 0, 0, 0, 0xff0000} /* f0-f7: 4-byte sequence */
};
const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
unsigned int i;
@@ -1653,15 +1682,15 @@ add_utf8_anychar (void)
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
- dfa->utf8_anychar_classes[i] = CSET + charclass_index(c);
+ dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
}
/* A valid UTF-8 character is
- ([0x00-0x7f]
- |[0xc2-0xdf][0x80-0xbf]
- |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
- |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
+ ([0x00-0x7f]
+ |[0xc2-0xdf][0x80-0xbf]
+ |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
+ |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f]
and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
@@ -1721,28 +1750,28 @@ atom (void)
}
else if (MBS_SUPPORT && tok == WCHAR)
{
- addtok_wc (case_fold ? towlower(wctok) : wctok);
+ addtok_wc (case_fold ? towlower (wctok) : wctok);
#ifndef GREP
- if (case_fold && iswalpha(wctok))
+ if (case_fold && iswalpha (wctok))
{
- addtok_wc (towupper(wctok));
+ addtok_wc (towupper (wctok));
addtok (OR);
}
#endif
- tok = lex();
+ tok = lex ();
}
- else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8())
+ else if (MBS_SUPPORT && tok == ANYCHAR && using_utf8 ())
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
pretty sure it would be both profitable and correct to do it for
any encoding; however, the optimization must be done manually as
- it is done above in add_utf8_anychar. So, let's start with
+ it is done above in add_utf8_anychar. So, let's start with
UTF-8: it is the most used, and the structure of the encoding
makes the correctness more obvious. */
- add_utf8_anychar();
- tok = lex();
+ add_utf8_anychar ();
+ tok = lex ();
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
@@ -1751,19 +1780,19 @@ atom (void)
#endif /* MBS_SUPPORT */
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
- addtok(tok);
- tok = lex();
+ addtok (tok);
+ tok = lex ();
}
else if (tok == LPAREN)
{
- tok = lex();
- regexp();
+ tok = lex ();
+ regexp ();
if (tok != RPAREN)
- dfaerror(_("unbalanced ("));
- tok = lex();
+ dfaerror (_("unbalanced ("));
+ tok = lex ();
}
else
- addtok(EMPTY);
+ addtok (EMPTY);
}
/* Return the number of tokens in the given subexpression. */
@@ -1779,11 +1808,11 @@ nsubtoks (size_t tindex)
case QMARK:
case STAR:
case PLUS:
- return 1 + nsubtoks(tindex - 1);
+ return 1 + nsubtoks (tindex - 1);
case CAT:
case OR:
- ntoks1 = nsubtoks(tindex - 1);
- return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1);
+ ntoks1 = nsubtoks (tindex - 1);
+ return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
}
}
@@ -1795,7 +1824,7 @@ copytoks (size_t tindex, size_t ntokens)
for (i = 0; i < ntokens; ++i)
{
- addtok(dfa->tokens[tindex + i]);
+ addtok (dfa->tokens[tindex + i]);
/* Update index into multibyte csets. */
if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET)
dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i];
@@ -1808,62 +1837,62 @@ closure (void)
int i;
size_t tindex, ntokens;
- atom();
+ atom ();
while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
if (tok == REPMN && (minrep || maxrep))
{
- ntokens = nsubtoks(dfa->tindex);
+ ntokens = nsubtoks (dfa->tindex);
tindex = dfa->tindex - ntokens;
if (maxrep < 0)
- addtok(PLUS);
+ addtok (PLUS);
if (minrep == 0)
- addtok(QMARK);
+ addtok (QMARK);
for (i = 1; i < minrep; ++i)
{
- copytoks(tindex, ntokens);
- addtok(CAT);
+ copytoks (tindex, ntokens);
+ addtok (CAT);
}
for (; i < maxrep; ++i)
{
- copytoks(tindex, ntokens);
- addtok(QMARK);
- addtok(CAT);
+ copytoks (tindex, ntokens);
+ addtok (QMARK);
+ addtok (CAT);
}
- tok = lex();
+ tok = lex ();
}
else if (tok == REPMN)
{
- dfa->tindex -= nsubtoks(dfa->tindex);
- tok = lex();
- closure();
+ dfa->tindex -= nsubtoks (dfa->tindex);
+ tok = lex ();
+ closure ();
}
else
{
- addtok(tok);
- tok = lex();
+ addtok (tok);
+ tok = lex ();
}
}
static void
branch (void)
{
- closure();
+ closure ();
while (tok != RPAREN && tok != OR && tok >= 0)
{
- closure();
- addtok(CAT);
+ closure ();
+ addtok (CAT);
}
}
static void
regexp (void)
{
- branch();
+ branch ();
while (tok == OR)
{
- tok = lex();
- branch();
- addtok(OR);
+ tok = lex ();
+ branch ();
+ addtok (OR);
}
}
@@ -1885,25 +1914,25 @@ dfaparse (char const *s, size_t len, struct dfa *d)
if (MB_CUR_MAX > 1)
{
cur_mb_len = 0;
- memset(&mbs, 0, sizeof mbs);
+ memset (&mbs, 0, sizeof mbs);
}
- if (! syntax_bits_set)
- dfaerror(_("no syntax specified"));
+ if (!syntax_bits_set)
+ dfaerror (_("no syntax specified"));
- tok = lex();
+ tok = lex ();
depth = d->depth;
- regexp();
+ regexp ();
if (tok != END)
- dfaerror(_("unbalanced )"));
+ dfaerror (_("unbalanced )"));
- addtok(END - d->nregexps);
- addtok(CAT);
+ addtok (END - d->nregexps);
+ addtok (CAT);
if (d->nregexps)
- addtok(OR);
+ addtok (OR);
++d->nregexps;
}
@@ -1912,17 +1941,17 @@ dfaparse (char const *s, size_t len, struct dfa *d)
/* Copy one set to another; the destination must be large enough. */
static void
-copy (position_set const *src, position_set *dst)
+copy (position_set const *src, position_set * dst)
{
- REALLOC_IF_NECESSARY(dst->elems, dst->alloc, src->nelem);
- memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * src->nelem);
+ REALLOC_IF_NECESSARY (dst->elems, dst->alloc, src->nelem);
+ memcpy (dst->elems, src->elems, sizeof (dst->elems[0]) * src->nelem);
dst->nelem = src->nelem;
}
static void
-alloc_position_set (position_set *s, size_t size)
+alloc_position_set (position_set * s, size_t size)
{
- MALLOC(s->elems, size);
+ MALLOC (s->elems, size);
s->alloc = size;
s->nelem = 0;
}
@@ -1932,7 +1961,7 @@ alloc_position_set (position_set *s, size_t size)
then merge (logically-OR) P's constraints into the one in S.
S->elems must point to an array large enough to hold the resulting set. */
static void
-insert (position p, position_set *s)
+insert (position p, position_set * s)
{
size_t count = s->nelem;
size_t lo = 0, hi = count;
@@ -1952,7 +1981,7 @@ insert (position p, position_set *s)
return;
}
- REALLOC_IF_NECESSARY(s->elems, s->alloc, count + 1);
+ REALLOC_IF_NECESSARY (s->elems, s->alloc, count + 1);
for (i = count; i > lo; i--)
s->elems[i] = s->elems[i - 1];
s->elems[lo] = p;
@@ -1962,11 +1991,11 @@ insert (position p, position_set *s)
/* Merge two sets of positions into a third. The result is exactly as if
the positions of both sets were inserted into an initially empty set. */
static void
-merge (position_set const *s1, position_set const *s2, position_set *m)
+merge (position_set const *s1, position_set const *s2, position_set * m)
{
size_t i = 0, j = 0;
- REALLOC_IF_NECESSARY(m->elems, m->alloc, s1->nelem + s2->nelem);
+ REALLOC_IF_NECESSARY (m->elems, m->alloc, s1->nelem + s2->nelem);
m->nelem = 0;
while (i < s1->nelem && j < s2->nelem)
if (s1->elems[i].index > s2->elems[j].index)
@@ -1986,7 +2015,7 @@ merge (position_set const *s1, position_set const *s2,
position_set *m)
/* Delete a position from a set. */
static void
-delete (position p, position_set *s)
+delete (position p, position_set * s)
{
size_t i;
@@ -2027,10 +2056,10 @@ state_index (struct dfa *d, position_set const *s, int
context)
}
/* We'll have to create a new state. */
- REALLOC_IF_NECESSARY(d->states, d->salloc, d->sindex + 1);
+ REALLOC_IF_NECESSARY (d->states, d->salloc, d->sindex + 1);
d->states[i].hash = hash;
- alloc_position_set(&d->states[i].elems, s->nelem);
- copy(s, &d->states[i].elems);
+ alloc_position_set (&d->states[i].elems, s->nelem);
+ copy (s, &d->states[i].elems);
d->states[i].context = context;
d->states[i].backref = 0;
d->states[i].constraint = 0;
@@ -2046,7 +2075,7 @@ state_index (struct dfa *d, position_set const *s, int
context)
constraint = s->elems[j].constraint;
if (SUCCEEDS_IN_CONTEXT (constraint, context, CTX_ANY))
d->states[i].constraint |= constraint;
- if (! d->states[i].first_end)
+ if (!d->states[i].first_end)
d->states[i].first_end = d->tokens[s->elems[j].index];
}
else if (d->tokens[s->elems[j].index] == BACKREF)
@@ -2066,13 +2095,13 @@ state_index (struct dfa *d, position_set const *s, int
context)
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void
-epsclosure (position_set *s, struct dfa const *d)
+epsclosure (position_set * s, struct dfa const *d)
{
size_t i, j;
- char *visited; /* array of booleans, enough to use char, not int */
+ char *visited; /* array of booleans, enough to use char, not
int */
position p, old;
- CALLOC(visited, d->tindex);
+ CALLOC (visited, d->tindex);
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
@@ -2085,7 +2114,7 @@ epsclosure (position_set *s, struct dfa const *d)
{
old = s->elems[i];
p.constraint = old.constraint;
- delete(s->elems[i], s);
+ delete (s->elems[i], s);
if (visited[old.index])
{
--i;
@@ -2118,25 +2147,25 @@ epsclosure (position_set *s, struct dfa const *d)
for (j = 0; j < d->follows[old.index].nelem; ++j)
{
p.index = d->follows[old.index].elems[j].index;
- insert(p, s);
+ insert (p, s);
}
/* Force rescan to start at the beginning. */
i = -1;
}
- free(visited);
+ free (visited);
}
/* Returns the set of contexts for which there is at least one
character included in C. */
static int
-charclass_context(charclass c)
+charclass_context (charclass c)
{
int context = 0;
unsigned int j;
- if (tstbit(eolbyte, c))
+ if (tstbit (eolbyte, c))
context |= CTX_NEWLINE;
for (j = 0; j < CHARCLASS_INTS; ++j)
@@ -2173,7 +2202,6 @@ state_separate_contexts (position_set const *s)
return separate_contexts;
}
-
/* Perform bottom-up analysis on the parse tree, computing various functions.
Note that at this point, we're pretending constructs like \< are real
characters rather than constraints on what can follow them.
@@ -2229,14 +2257,14 @@ state_separate_contexts (position_set const *s)
void
dfaanalyze (struct dfa *d, int searchflag)
{
- int *nullable; /* Nullable stack. */
- size_t *nfirstpos; /* Element count stack for firstpos sets. */
- position *firstpos; /* Array where firstpos elements are stored. */
- size_t *nlastpos; /* Element count stack for lastpos sets. */
- position *lastpos; /* Array where lastpos elements are stored. */
- position_set tmp; /* Temporary set for merging sets. */
- position_set merged; /* Result of merging sets. */
- int separate_contexts; /* Context wanted by some position. */
+ int *nullable; /* Nullable stack. */
+ size_t *nfirstpos; /* Element count stack for firstpos sets. */
+ position *firstpos; /* Array where firstpos elements are stored. */
+ size_t *nlastpos; /* Element count stack for lastpos sets. */
+ position *lastpos; /* Array where lastpos elements are stored. */
+ position_set tmp; /* Temporary set for merging sets. */
+ position_set merged; /* Result of merging sets. */
+ int separate_contexts; /* Context wanted by some position. */
int *o_nullable;
size_t *o_nfirst, *o_nlast;
position *o_firstpos, *o_lastpos;
@@ -2244,152 +2272,152 @@ dfaanalyze (struct dfa *d, int searchflag)
position *pos;
#ifdef DEBUG
- fprintf(stderr, "dfaanalyze:\n");
+ fprintf (stderr, "dfaanalyze:\n");
for (i = 0; i < d->tindex; ++i)
{
- fprintf(stderr, " %zd:", i);
- prtok(d->tokens[i]);
+ fprintf (stderr, " %zd:", i);
+ prtok (d->tokens[i]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
d->searchflag = searchflag;
- MALLOC(nullable, d->depth);
+ MALLOC (nullable, d->depth);
o_nullable = nullable;
- MALLOC(nfirstpos, d->depth);
+ MALLOC (nfirstpos, d->depth);
o_nfirst = nfirstpos;
- MALLOC(firstpos, d->nleaves);
+ MALLOC (firstpos, d->nleaves);
o_firstpos = firstpos, firstpos += d->nleaves;
- MALLOC(nlastpos, d->depth);
+ MALLOC (nlastpos, d->depth);
o_nlast = nlastpos;
- MALLOC(lastpos, d->nleaves);
+ MALLOC (lastpos, d->nleaves);
o_lastpos = lastpos, lastpos += d->nleaves;
- alloc_position_set(&merged, d->nleaves);
+ alloc_position_set (&merged, d->nleaves);
- CALLOC(d->follows, d->tindex);
+ CALLOC (d->follows, d->tindex);
for (i = 0; i < d->tindex; ++i)
{
- switch (d->tokens[i])
- {
- case EMPTY:
- /* The empty set is nullable. */
- *nullable++ = 1;
+ switch (d->tokens[i])
+ {
+ case EMPTY:
+ /* The empty set is nullable. */
+ *nullable++ = 1;
- /* The firstpos and lastpos of the empty leaf are both empty. */
- *nfirstpos++ = *nlastpos++ = 0;
- break;
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ *nfirstpos++ = *nlastpos++ = 0;
+ break;
- case STAR:
- case PLUS:
- /* Every element in the firstpos of the argument is in the follow
- of every element in the lastpos. */
- tmp.nelem = nfirstpos[-1];
- tmp.elems = firstpos;
- pos = lastpos;
- for (j = 0; j < nlastpos[-1]; ++j)
- {
- merge(&tmp, &d->follows[pos[j].index], &merged);
- copy(&merged, &d->follows[pos[j].index]);
- }
+ case STAR:
+ case PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < nlastpos[-1]; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
- case QMARK:
- /* A QMARK or STAR node is automatically nullable. */
- if (d->tokens[i] != PLUS)
- nullable[-1] = 1;
- break;
+ case QMARK:
+ /* A QMARK or STAR node is automatically nullable. */
+ if (d->tokens[i] != PLUS)
+ nullable[-1] = 1;
+ break;
- case CAT:
- /* Every element in the firstpos of the second argument is in the
- follow of every element in the lastpos of the first argument. */
- tmp.nelem = nfirstpos[-1];
- tmp.elems = firstpos;
- pos = lastpos + nlastpos[-1];
- for (j = 0; j < nlastpos[-2]; ++j)
- {
- merge(&tmp, &d->follows[pos[j].index], &merged);
- copy(&merged, &d->follows[pos[j].index]);
- }
+ case CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos + nlastpos[-1];
+ for (j = 0; j < nlastpos[-2]; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
- /* The firstpos of a CAT node is the firstpos of the first argument,
- union that of the second argument if the first is nullable. */
- if (nullable[-2])
- nfirstpos[-2] += nfirstpos[-1];
- else
- firstpos += nfirstpos[-1];
- --nfirstpos;
+ /* The firstpos of a CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (nullable[-2])
+ nfirstpos[-2] += nfirstpos[-1];
+ else
+ firstpos += nfirstpos[-1];
+ --nfirstpos;
- /* The lastpos of a CAT node is the lastpos of the second argument,
- union that of the first argument if the second is nullable. */
- if (nullable[-1])
- nlastpos[-2] += nlastpos[-1];
- else
- {
- pos = lastpos + nlastpos[-2];
- for (j = nlastpos[-1]; j-- > 0; )
- pos[j] = lastpos[j];
- lastpos += nlastpos[-2];
- nlastpos[-2] = nlastpos[-1];
- }
- --nlastpos;
+ /* The lastpos of a CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (nullable[-1])
+ nlastpos[-2] += nlastpos[-1];
+ else
+ {
+ pos = lastpos + nlastpos[-2];
+ for (j = nlastpos[-1]; j-- > 0;)
+ pos[j] = lastpos[j];
+ lastpos += nlastpos[-2];
+ nlastpos[-2] = nlastpos[-1];
+ }
+ --nlastpos;
- /* A CAT node is nullable if both arguments are nullable. */
- nullable[-2] = nullable[-1] && nullable[-2];
- --nullable;
- break;
+ /* A CAT node is nullable if both arguments are nullable. */
+ nullable[-2] = nullable[-1] && nullable[-2];
+ --nullable;
+ break;
- case OR:
- /* The firstpos is the union of the firstpos of each argument. */
- nfirstpos[-2] += nfirstpos[-1];
- --nfirstpos;
+ case OR:
+ /* The firstpos is the union of the firstpos of each argument. */
+ nfirstpos[-2] += nfirstpos[-1];
+ --nfirstpos;
- /* The lastpos is the union of the lastpos of each argument. */
- nlastpos[-2] += nlastpos[-1];
- --nlastpos;
+ /* The lastpos is the union of the lastpos of each argument. */
+ nlastpos[-2] += nlastpos[-1];
+ --nlastpos;
- /* An OR node is nullable if either argument is nullable. */
- nullable[-2] = nullable[-1] || nullable[-2];
- --nullable;
- break;
+ /* An OR node is nullable if either argument is nullable. */
+ nullable[-2] = nullable[-1] || nullable[-2];
+ --nullable;
+ break;
- default:
- /* Anything else is a nonempty position. (Note that special
- constructs like \< are treated as nonempty strings here;
- an "epsilon closure" effectively makes them nullable later.
- Backreferences have to get a real position so we can detect
- transitions on them later. But they are nullable. */
- *nullable++ = d->tokens[i] == BACKREF;
-
- /* This position is in its own firstpos and lastpos. */
- *nfirstpos++ = *nlastpos++ = 1;
- --firstpos, --lastpos;
- firstpos->index = lastpos->index = i;
- firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
-
- /* Allocate the follow set for this position. */
- alloc_position_set(&d->follows[i], 1);
- break;
- }
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ *nullable++ = d->tokens[i] == BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ *nfirstpos++ = *nlastpos++ = 1;
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ alloc_position_set (&d->follows[i], 1);
+ break;
+ }
#ifdef DEBUG
- /* ... balance the above nonsyntactic #ifdef goo... */
- fprintf(stderr, "node %zd:", i);
- prtok(d->tokens[i]);
- putc('\n', stderr);
- fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
- fprintf(stderr, " firstpos:");
- for (j = nfirstpos[-1]; j-- > 0; )
+ /* ... balance the above nonsyntactic #ifdef goo... */
+ fprintf (stderr, "node %zd:", i);
+ prtok (d->tokens[i]);
+ putc ('\n', stderr);
+ fprintf (stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+ fprintf (stderr, " firstpos:");
+ for (j = nfirstpos[-1]; j-- > 0;)
{
- fprintf(stderr, " %zd:", firstpos[j].index);
- prtok(d->tokens[firstpos[j].index]);
+ fprintf (stderr, " %zd:", firstpos[j].index);
+ prtok (d->tokens[firstpos[j].index]);
}
- fprintf(stderr, "\n lastpos:");
- for (j = nlastpos[-1]; j-- > 0; )
+ fprintf (stderr, "\n lastpos:");
+ for (j = nlastpos[-1]; j-- > 0;)
{
- fprintf(stderr, " %zd:", lastpos[j].index);
- prtok(d->tokens[lastpos[j].index]);
+ fprintf (stderr, " %zd:", lastpos[j].index);
+ prtok (d->tokens[lastpos[j].index]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
}
@@ -2398,54 +2426,51 @@ dfaanalyze (struct dfa *d, int searchflag)
for (i = 0; i < d->tindex; ++i)
if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
#if MBS_SUPPORT
- || d->tokens[i] == ANYCHAR
- || d->tokens[i] == MBCSET
+ || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
#endif
|| d->tokens[i] >= CSET)
{
#ifdef DEBUG
- fprintf(stderr, "follows(%zd:", i);
- prtok(d->tokens[i]);
- fprintf(stderr, "):");
- for (j = d->follows[i].nelem; j-- > 0; )
+ fprintf (stderr, "follows(%zd:", i);
+ prtok (d->tokens[i]);
+ fprintf (stderr, "):");
+ for (j = d->follows[i].nelem; j-- > 0;)
{
- fprintf(stderr, " %zd:", d->follows[i].elems[j].index);
- prtok(d->tokens[d->follows[i].elems[j].index]);
+ fprintf (stderr, " %zd:", d->follows[i].elems[j].index);
+ prtok (d->tokens[d->follows[i].elems[j].index]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
- copy(&d->follows[i], &merged);
- epsclosure(&merged, d);
- copy(&merged, &d->follows[i]);
+ copy (&d->follows[i], &merged);
+ epsclosure (&merged, d);
+ copy (&merged, &d->follows[i]);
}
/* Get the epsilon closure of the firstpos of the regexp. The result will
be the set of positions of state 0. */
merged.nelem = 0;
for (i = 0; i < nfirstpos[-1]; ++i)
- insert(firstpos[i], &merged);
- epsclosure(&merged, d);
+ insert (firstpos[i], &merged);
+ epsclosure (&merged, d);
/* Build the initial state. */
d->salloc = 1;
d->sindex = 0;
- MALLOC(d->states, d->salloc);
+ MALLOC (d->states, d->salloc);
separate_contexts = state_separate_contexts (&merged);
- state_index(d, &merged,
- (separate_contexts & CTX_NEWLINE
- ? CTX_NEWLINE
- : separate_contexts ^ CTX_ANY));
-
- free(o_nullable);
- free(o_nfirst);
- free(o_firstpos);
- free(o_nlast);
- free(o_lastpos);
- free(merged.elems);
+ state_index (d, &merged,
+ (separate_contexts & CTX_NEWLINE
+ ? CTX_NEWLINE : separate_contexts ^ CTX_ANY));
+
+ free (o_nullable);
+ free (o_nfirst);
+ free (o_firstpos);
+ free (o_nlast);
+ free (o_lastpos);
+ free (merged.elems);
}
-
/* Find, for each character, the transition out of state s of d, and store
it in the appropriate slot of trans.
@@ -2479,38 +2504,38 @@ dfaanalyze (struct dfa *d, int searchflag)
void
dfastate (state_num s, struct dfa *d, state_num trans[])
{
- leaf_set *grps; /* As many as will ever be needed. */
- charclass *labels; /* Labels corresponding to the groups. */
- size_t ngrps = 0; /* Number of groups actually used. */
- position pos; /* Current position being considered. */
- charclass matches; /* Set of matching characters. */
- int matchesf; /* True if matches is nonempty. */
- charclass intersect; /* Intersection with some label set. */
- int intersectf; /* True if intersect is nonempty. */
- charclass leftovers; /* Stuff in the label that didn't match. */
- int leftoversf; /* True if leftovers is nonempty. */
- position_set follows; /* Union of the follows of some group.
*/
- position_set tmp; /* Temporary space for merging sets. */
- int possible_contexts; /* Contexts that this group can match. */
- int separate_contexts; /* Context that new state wants to know. */
- state_num state; /* New state. */
- state_num state_newline; /* New state on a newline transition. */
- state_num state_letter; /* New state on a letter transition. */
- int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
+ leaf_set *grps; /* As many as will ever be needed. */
+ charclass *labels; /* Labels corresponding to the groups. */
+ size_t ngrps = 0; /* Number of groups actually used. */
+ position pos; /* Current position being considered. */
+ charclass matches; /* Set of matching characters. */
+ int matchesf; /* True if matches is nonempty. */
+ charclass intersect; /* Intersection with some label set. */
+ int intersectf; /* True if intersect is nonempty. */
+ charclass leftovers; /* Stuff in the label that didn't match. */
+ int leftoversf; /* True if leftovers is nonempty. */
+ position_set follows; /* Union of the follows of some group. */
+ position_set tmp; /* Temporary space for merging sets. */
+ int possible_contexts; /* Contexts that this group can match. */
+ int separate_contexts; /* Context that new state wants to know. */
+ state_num state; /* New state. */
+ state_num state_newline; /* New state on a newline transition. */
+ state_num state_letter; /* New state on a letter transition. */
+ int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
size_t i, j, k;
MALLOC (grps, NOTCHAR);
MALLOC (labels, NOTCHAR);
- zeroset(matches);
+ zeroset (matches);
for (i = 0; i < d->states[s].elems.nelem; ++i)
{
pos = d->states[s].elems.elems[i];
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
- setbit(d->tokens[pos.index], matches);
+ setbit (d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
- copyset(d->charclasses[d->tokens[pos.index] - CSET], matches);
+ copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
else if (MBS_SUPPORT
&& (d->tokens[pos.index] == ANYCHAR
|| d->tokens[pos.index] == MBCSET))
@@ -2520,8 +2545,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
must put it to d->states[s].mbps, which contains the positions
which can match with a single character not a byte. */
if (d->states[s].mbps.nelem == 0)
- alloc_position_set(&d->states[s].mbps, 1);
- insert(pos, &(d->states[s].mbps));
+ alloc_position_set (&d->states[s].mbps, 1);
+ insert (pos, &(d->states[s].mbps));
continue;
}
else
@@ -2531,15 +2556,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
they fail in the current context. */
if (pos.constraint != NO_CONSTRAINT)
{
- if (! SUCCEEDS_IN_CONTEXT(pos.constraint,
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NEWLINE))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~newline[j];
- if (! SUCCEEDS_IN_CONTEXT(pos.constraint,
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_LETTER))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= ~letters[j];
- if (! SUCCEEDS_IN_CONTEXT(pos.constraint,
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NONE))
for (j = 0; j < CHARCLASS_INTS; ++j)
matches[j] &= letters[j] | newline[j];
@@ -2557,7 +2582,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
group's label doesn't contain that character, go on to the
next group. */
if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
- && !tstbit(d->tokens[pos.index], labels[j]))
+ && !tstbit (d->tokens[pos.index], labels[j]))
continue;
/* Check if this group's label has a nonempty intersection with
@@ -2565,7 +2590,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
intersectf = 0;
for (k = 0; k < CHARCLASS_INTS; ++k)
(intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
- if (! intersectf)
+ if (!intersectf)
continue;
/* It does; now find the set differences both ways. */
@@ -2582,11 +2607,11 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* If there were leftovers, create a new group labeled with them. */
if (leftoversf)
{
- copyset(leftovers, labels[ngrps]);
- copyset(intersect, labels[j]);
- MALLOC(grps[ngrps].elems, d->nleaves);
- memcpy(grps[ngrps].elems, grps[j].elems,
- sizeof (grps[j].elems[0]) * grps[j].nelem);
+ copyset (leftovers, labels[ngrps]);
+ copyset (intersect, labels[j]);
+ MALLOC (grps[ngrps].elems, d->nleaves);
+ memcpy (grps[ngrps].elems, grps[j].elems,
+ sizeof (grps[j].elems[0]) * grps[j].nelem);
grps[ngrps].nelem = grps[j].nelem;
++ngrps;
}
@@ -2597,7 +2622,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* If every character matching the current position has been
accounted for, we're done. */
- if (! matchesf)
+ if (!matchesf)
break;
}
@@ -2605,17 +2630,17 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
unaccounted for, then we'll have to create a new group. */
if (j == ngrps)
{
- copyset(matches, labels[ngrps]);
- zeroset(matches);
- MALLOC(grps[ngrps].elems, d->nleaves);
+ copyset (matches, labels[ngrps]);
+ zeroset (matches);
+ MALLOC (grps[ngrps].elems, d->nleaves);
grps[ngrps].nelem = 1;
grps[ngrps].elems[0] = pos.index;
++ngrps;
}
}
- alloc_position_set(&follows, d->nleaves);
- alloc_position_set(&tmp, d->nleaves);
+ alloc_position_set (&follows, d->nleaves);
+ alloc_position_set (&tmp, d->nleaves);
/* If we are a searching matcher, the default transition is to a state
containing the positions of state 0, otherwise the default transition
@@ -2623,20 +2648,20 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
if (d->searchflag)
{
/* Find the state(s) corresponding to the positions of state 0. */
- copy(&d->states[0].elems, &follows);
+ copy (&d->states[0].elems, &follows);
separate_contexts = state_separate_contexts (&follows);
state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
if (separate_contexts & CTX_NEWLINE)
- state_newline = state_index(d, &follows, CTX_NEWLINE);
+ state_newline = state_index (d, &follows, CTX_NEWLINE);
else
state_newline = state;
if (separate_contexts & CTX_LETTER)
- state_letter = state_index(d, &follows, CTX_LETTER);
+ state_letter = state_index (d, &follows, CTX_LETTER);
else
state_letter = state;
for (i = 0; i < NOTCHAR; ++i)
- trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state;
+ trans[i] = (IS_WORD_CONSTITUENT (i)) ? state_letter : state;
trans[eolbyte] = state_newline;
}
else
@@ -2651,7 +2676,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
This is a hideously inefficient loop. Fix it someday. */
for (j = 0; j < grps[i].nelem; ++j)
for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
- insert(d->follows[grps[i].elems[j]].elems[k], &follows);
+ insert (d->follows[grps[i].elems[j]].elems[k], &follows);
if (d->mb_cur_max > 1)
{
@@ -2687,13 +2712,12 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
if (d->searchflag
- && (! MBS_SUPPORT
- || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
+ && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
for (j = 0; j < d->states[0].elems.nelem; ++j)
- insert(d->states[0].elems.elems[j], &follows);
+ insert (d->states[0].elems.elems[j], &follows);
/* Find out if the new state will want any context information. */
- possible_contexts = charclass_context(labels[i]);
+ possible_contexts = charclass_context (labels[i]);
separate_contexts = state_separate_contexts (&follows);
/* Find the state(s) corresponding to the union of the follows. */
@@ -2706,7 +2730,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
else
state_newline = state;
if (separate_contexts & possible_contexts & CTX_LETTER)
- state_letter = state_index(d, &follows, CTX_LETTER);
+ state_letter = state_index (d, &follows, CTX_LETTER);
else
state_letter = state;
@@ -2719,7 +2743,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
if (c == eolbyte)
trans[c] = state_newline;
- else if (IS_WORD_CONSTITUENT(c))
+ else if (IS_WORD_CONSTITUENT (c))
trans[c] = state_letter;
else if (c < NOTCHAR)
trans[c] = state;
@@ -2727,11 +2751,11 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
}
for (i = 0; i < ngrps; ++i)
- free(grps[i].elems);
- free(follows.elems);
- free(tmp.elems);
- free(grps);
- free(labels);
+ free (grps[i].elems);
+ free (follows.elems);
+ free (tmp.elems);
+ free (grps);
+ free (labels);
}
/* Some routines for manipulating a compiled dfa's transition tables.
@@ -2744,7 +2768,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
static void
build_state (state_num s, struct dfa *d)
{
- state_num *trans; /* The new transition table. */
+ state_num *trans; /* The new transition table. */
state_num i;
/* Set an upper limit on the number of transition tables that will ever
@@ -2755,8 +2779,8 @@ build_state (state_num s, struct dfa *d)
{
for (i = 0; i < d->tralloc; ++i)
{
- free(d->trans[i]);
- free(d->fails[i]);
+ free (d->trans[i]);
+ free (d->fails[i]);
d->trans[i] = d->fails[i] = NULL;
}
d->trcount = 0;
@@ -2766,15 +2790,15 @@ build_state (state_num s, struct dfa *d)
/* Set up the success bits for this state. */
d->success[s] = 0;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_NEWLINE, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s, *d))
d->success[s] |= CTX_NEWLINE;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_LETTER, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_LETTER, s, *d))
d->success[s] |= CTX_LETTER;
- if (ACCEPTS_IN_CONTEXT(d->states[s].context, CTX_NONE, s, *d))
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
d->success[s] |= CTX_NONE;
- MALLOC(trans, NOTCHAR);
- dfastate(s, d, trans);
+ MALLOC (trans, NOTCHAR);
+ dfastate (s, d, trans);
/* Now go through the new transition table, and make sure that the trans
and fail arrays are allocated large enough to hold a pointer for the
@@ -2786,11 +2810,11 @@ build_state (state_num s, struct dfa *d)
while (trans[i] >= d->tralloc)
d->tralloc *= 2;
- REALLOC(d->realtrans, d->tralloc + 1);
+ REALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- REALLOC(d->fails, d->tralloc);
- REALLOC(d->success, d->tralloc);
- REALLOC(d->newlines, d->tralloc);
+ REALLOC (d->fails, d->tralloc);
+ REALLOC (d->success, d->tralloc);
+ REALLOC (d->newlines, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
@@ -2803,7 +2827,7 @@ build_state (state_num s, struct dfa *d)
d->newlines[s] = trans[eolbyte];
trans[eolbyte] = -1;
- if (ACCEPTING(s, *d))
+ if (ACCEPTING (s, *d))
d->fails[s] = trans;
else
d->trans[s] = trans;
@@ -2814,12 +2838,12 @@ build_state_zero (struct dfa *d)
{
d->tralloc = 1;
d->trcount = 0;
- CALLOC(d->realtrans, d->tralloc + 1);
+ CALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- CALLOC(d->fails, d->tralloc);
- MALLOC(d->success, d->tralloc);
- MALLOC(d->newlines, d->tralloc);
- build_state(0, d);
+ CALLOC (d->fails, d->tralloc);
+ MALLOC (d->success, d->tralloc);
+ MALLOC (d->newlines, d->tralloc);
+ build_state (0, d);
}
/* Multibyte character handling sub-routines for dfaexec. */
@@ -2848,7 +2872,7 @@ build_state_zero (struct dfa *d)
}
static void
-realloc_trans_if_necessary(struct dfa *d, state_num new_state)
+realloc_trans_if_necessary (struct dfa *d, state_num new_state)
{
/* Make sure that the trans and fail arrays are allocated large enough
to hold a pointer for the new state. */
@@ -2858,11 +2882,11 @@ realloc_trans_if_necessary(struct dfa *d, state_num
new_state)
while (new_state >= d->tralloc)
d->tralloc *= 2;
- REALLOC(d->realtrans, d->tralloc + 1);
+ REALLOC (d->realtrans, d->tralloc + 1);
d->trans = d->realtrans + 1;
- REALLOC(d->fails, d->tralloc);
- REALLOC(d->success, d->tralloc);
- REALLOC(d->newlines, d->tralloc);
+ REALLOC (d->fails, d->tralloc);
+ REALLOC (d->success, d->tralloc);
+ REALLOC (d->newlines, d->tralloc);
while (oldalloc < d->tralloc)
{
d->trans[oldalloc] = NULL;
@@ -2875,9 +2899,9 @@ realloc_trans_if_necessary(struct dfa *d, state_num
new_state)
transit_state_consume_1char. */
typedef enum
{
- TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
- TRANSIT_STATE_DONE, /* State transition has finished. */
- TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
+ TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
+ TRANSIT_STATE_DONE, /* State transition has finished. */
+ TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
} status_transit_state;
/* Consume a single byte and transit state from 's' to '*next_state'.
@@ -2886,7 +2910,7 @@ typedef enum
reach the end of the buffer. */
static status_transit_state
transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
- state_num *next_state)
+ state_num * next_state)
{
state_num *t;
state_num works = s;
@@ -2918,7 +2942,7 @@ transit_state_singlebyte (struct dfa *d, state_num s,
unsigned char const *p,
}
else
{
- build_state(works, d);
+ build_state (works, d);
}
}
*next_state = works;
@@ -2936,22 +2960,22 @@ match_anychar (struct dfa *d, state_num s, position
pos, size_t idx)
int mbclen;
wc = inputwcs[idx];
- mbclen = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
+ mbclen = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Check syntax bits. */
- if (wc == (wchar_t)eolbyte)
+ if (wc == (wchar_t) eolbyte)
{
if (!(syntax_bits & RE_DOT_NEWLINE))
return 0;
}
- else if (wc == (wchar_t)'\0')
+ else if (wc == (wchar_t) '\0')
{
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
- context = wchar_context(wc);
- if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].context, context))
+ context = wchar_context (wc);
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
return 0;
return mbclen;
@@ -2965,10 +2989,10 @@ static int
match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
{
size_t i;
- int match; /* Flag which represent that matching succeed. */
- int match_len; /* Length of the character (or collating element)
- with which this operator match. */
- int op_len; /* Length of the operator. */
+ int match; /* Flag which represent that matching succeed.
*/
+ int match_len; /* Length of the character (or collating
element)
+ with which this operator match. */
+ int op_len; /* Length of the operator. */
char buffer[128];
wchar_t wcbuf[6];
@@ -2976,53 +3000,53 @@ match_mb_charset (struct dfa *d, state_num s, position
pos, size_t idx)
struct mb_char_classes *work_mbc;
int context;
- wchar_t wc; /* Current refering character. */
+ wchar_t wc; /* Current refering character. */
wc = inputwcs[idx];
/* Check syntax bits. */
- if (wc == (wchar_t)eolbyte)
+ if (wc == (wchar_t) eolbyte)
{
if (!(syntax_bits & RE_DOT_NEWLINE))
return 0;
}
- else if (wc == (wchar_t)'\0')
+ else if (wc == (wchar_t) '\0')
{
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
- context = wchar_context(wc);
- if (!SUCCEEDS_IN_CONTEXT(pos.constraint, d->states[s].context, context))
+ context = wchar_context (wc);
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
return 0;
/* Assign the current refering operator to work_mbc. */
work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
match = !work_mbc->invert;
- match_len = (mblen_buf[idx] == 0)? 1 : mblen_buf[idx];
+ match_len = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Match in range 0-255? */
if (wc < NOTCHAR && work_mbc->cset != -1
- && tstbit((unsigned char)wc, d->charclasses[work_mbc->cset]))
+ && tstbit ((unsigned char) wc, d->charclasses[work_mbc->cset]))
goto charset_matched;
/* match with a character class? */
- for (i = 0; i<work_mbc->nch_classes; i++)
+ for (i = 0; i < work_mbc->nch_classes; i++)
{
- if (iswctype((wint_t)wc, work_mbc->ch_classes[i]))
+ if (iswctype ((wint_t) wc, work_mbc->ch_classes[i]))
goto charset_matched;
}
- strncpy(buffer, (char const *) buf_begin + idx, match_len);
+ strncpy (buffer, (char const *) buf_begin + idx, match_len);
buffer[match_len] = '\0';
/* match with an equivalent class? */
- for (i = 0; i<work_mbc->nequivs; i++)
+ for (i = 0; i < work_mbc->nequivs; i++)
{
- op_len = strlen(work_mbc->equivs[i]);
- strncpy(buffer, (char const *) buf_begin + idx, op_len);
+ op_len = strlen (work_mbc->equivs[i]);
+ strncpy (buffer, (char const *) buf_begin + idx, op_len);
buffer[op_len] = '\0';
- if (strcoll(work_mbc->equivs[i], buffer) == 0)
+ if (strcoll (work_mbc->equivs[i], buffer) == 0)
{
match_len = op_len;
goto charset_matched;
@@ -3030,13 +3054,13 @@ match_mb_charset (struct dfa *d, state_num s, position
pos, size_t idx)
}
/* match with a collating element? */
- for (i = 0; i<work_mbc->ncoll_elems; i++)
+ for (i = 0; i < work_mbc->ncoll_elems; i++)
{
- op_len = strlen(work_mbc->coll_elems[i]);
- strncpy(buffer, (char const *) buf_begin + idx, op_len);
+ op_len = strlen (work_mbc->coll_elems[i]);
+ strncpy (buffer, (char const *) buf_begin + idx, op_len);
buffer[op_len] = '\0';
- if (strcoll(work_mbc->coll_elems[i], buffer) == 0)
+ if (strcoll (work_mbc->coll_elems[i], buffer) == 0)
{
match_len = op_len;
goto charset_matched;
@@ -3047,18 +3071,17 @@ match_mb_charset (struct dfa *d, state_num s, position
pos, size_t idx)
wcbuf[1] = wcbuf[3] = wcbuf[5] = '\0';
/* match with a range? */
- for (i = 0; i<work_mbc->nranges; i++)
+ for (i = 0; i < work_mbc->nranges; i++)
{
wcbuf[2] = work_mbc->range_sts[i];
wcbuf[4] = work_mbc->range_ends[i];
- if (wcscoll(wcbuf, wcbuf+2) >= 0 &&
- wcscoll(wcbuf+4, wcbuf) >= 0)
+ if (wcscoll (wcbuf, wcbuf + 2) >= 0 && wcscoll (wcbuf + 4, wcbuf) >= 0)
goto charset_matched;
}
/* match with a character? */
- for (i = 0; i<work_mbc->nchars; i++)
+ for (i = 0; i < work_mbc->nchars; i++)
{
if (wc == work_mbc->chars[i])
goto charset_matched;
@@ -3066,7 +3089,7 @@ match_mb_charset (struct dfa *d, state_num s, position
pos, size_t idx)
match = !match;
- charset_matched:
+charset_matched:
return match ? match_len : 0;
}
@@ -3077,26 +3100,26 @@ match_mb_charset (struct dfa *d, state_num s, position
pos, size_t idx)
`idx' is the index from the buf_begin, and it is the current position
in the buffer.
Caller MUST free the array which this function return. */
-static int*
+static int *
check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
{
size_t i;
- int* rarray;
+ int *rarray;
- MALLOC(rarray, d->states[s].mbps.nelem);
+ MALLOC (rarray, d->states[s].mbps.nelem);
for (i = 0; i < d->states[s].mbps.nelem; ++i)
{
position pos = d->states[s].mbps.elems[i];
- switch(d->tokens[pos.index])
+ switch (d->tokens[pos.index])
{
case ANYCHAR:
- rarray[i] = match_anychar(d, s, pos, idx);
+ rarray[i] = match_anychar (d, s, pos, idx);
break;
case MBCSET:
- rarray[i] = match_mb_charset(d, s, pos, idx);
+ rarray[i] = match_mb_charset (d, s, pos, idx);
break;
default:
- break; /* cannot happen. */
+ break; /* cannot happen. */
}
}
return rarray;
@@ -3111,18 +3134,17 @@ check_matching_with_multibyte_ops (struct dfa *d,
state_num s, size_t idx)
static status_transit_state
transit_state_consume_1char (struct dfa *d, state_num s,
unsigned char const **pp,
- int *match_lens, int *mbclen, position_set *pps)
+ int *match_lens, int *mbclen, position_set * pps)
{
size_t i, j;
int k;
state_num s1, s2;
- int* work_mbls;
+ int *work_mbls;
status_transit_state rs = TRANSIT_STATE_DONE;
/* Calculate the length of the (single/multi byte) character
to which p points. */
- *mbclen = (mblen_buf[*pp - buf_begin] == 0)? 1
- : mblen_buf[*pp - buf_begin];
+ *mbclen = (mblen_buf[*pp - buf_begin] == 0) ? 1 : mblen_buf[*pp - buf_begin];
/* Calculate the state which can be reached from the state `s' by
consuming `*mbclen' single bytes from the buffer. */
@@ -3130,30 +3152,29 @@ transit_state_consume_1char (struct dfa *d, state_num s,
for (k = 0; k < *mbclen; k++)
{
s2 = s1;
- rs = transit_state_singlebyte(d, s2, (*pp)++, &s1);
+ rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
}
/* Copy the positions contained by `s1' to the set `pps'. */
- copy(&(d->states[s1].elems), pps);
+ copy (&(d->states[s1].elems), pps);
/* Check (inputed)match_lens, and initialize if it is NULL. */
if (match_lens == NULL && d->states[s].mbps.nelem != 0)
- work_mbls = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+ work_mbls = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
else
work_mbls = match_lens;
/* Add all of the positions which can be reached from `s' by consuming
a single character. */
- for (i = 0; i < d->states[s].mbps.nelem ; i++)
- {
+ for (i = 0; i < d->states[s].mbps.nelem; i++)
+ {
if (work_mbls[i] == *mbclen)
for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
j++)
- insert(d->follows[d->states[s].mbps.elems[i].index].elems[j],
- pps);
+ insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], pps);
}
if (match_lens == NULL && work_mbls != NULL)
- free(work_mbls);
+ free (work_mbls);
return rs;
}
@@ -3164,11 +3185,11 @@ static state_num
transit_state (struct dfa *d, state_num s, unsigned char const **pp)
{
state_num s1;
- int mbclen; /* The length of current input multibyte character. */
+ int mbclen; /* The length of current input multibyte
character. */
int maxlen = 0;
size_t i, j;
int *match_lens = NULL;
- size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
+ size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
position_set follows;
unsigned char const *p1 = *pp;
wchar_t wc;
@@ -3178,7 +3199,7 @@ transit_state (struct dfa *d, state_num s, unsigned char
const **pp)
We check whether each of them can match or not. */
{
/* Note: caller must free the return value of this function. */
- match_lens = check_matching_with_multibyte_ops(d, s, *pp - buf_begin);
+ match_lens = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
for (i = 0; i < nelem; i++)
/* Search the operator which match the longest string,
@@ -3194,52 +3215,51 @@ transit_state (struct dfa *d, state_num s, unsigned
char const **pp)
We need to check only one single byte character. */
{
status_transit_state rs;
- rs = transit_state_singlebyte(d, s, *pp, &s1);
+ rs = transit_state_singlebyte (d, s, *pp, &s1);
/* We must update the pointer if state transition succeeded. */
if (rs == TRANSIT_STATE_DONE)
- ++*pp;
+ ++ * pp;
- free(match_lens);
+ free (match_lens);
return s1;
}
/* This state has some operators which can match a multibyte character. */
- alloc_position_set(&follows, d->nleaves);
+ alloc_position_set (&follows, d->nleaves);
/* `maxlen' may be longer than the length of a character, because it may
not be a character but a (multi character) collating element.
We enumerate all of the positions which `s' can reach by consuming
`maxlen' bytes. */
- transit_state_consume_1char(d, s, pp, match_lens, &mbclen, &follows);
+ transit_state_consume_1char (d, s, pp, match_lens, &mbclen, &follows);
wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index(d, &follows, wchar_context (wc));
- realloc_trans_if_necessary(d, s1);
+ s1 = state_index (d, &follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
while (*pp - p1 < maxlen)
{
- transit_state_consume_1char(d, s1, pp, NULL, &mbclen, &follows);
+ transit_state_consume_1char (d, s1, pp, NULL, &mbclen, &follows);
- for (i = 0; i < nelem ; i++)
+ for (i = 0; i < nelem; i++)
{
if (match_lens[i] == *pp - p1)
for (j = 0;
j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
- insert(d->follows[d->states[s1].mbps.elems[i].index].elems[j],
- &follows);
+ insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
+ &follows);
}
wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index(d, &follows, wchar_context (wc));
- realloc_trans_if_necessary(d, s1);
+ s1 = state_index (d, &follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
}
- free(match_lens);
- free(follows.elems);
+ free (match_lens);
+ free (follows.elems);
return s1;
}
-
/* Initialize mblen_buf and inputwcs with data from the next line. */
static void
@@ -3257,14 +3277,14 @@ prepare_wc_buf (const char *begin, const char *end)
if (remain_bytes == 0)
{
remain_bytes
- = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
+ = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs);
if (remain_bytes < 1
|| remain_bytes == (size_t) -1
|| remain_bytes == (size_t) -2
- || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
+ || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
{
remain_bytes = 0;
- inputwcs[i] = (wchar_t)begin[i];
+ inputwcs[i] = (wchar_t) begin[i];
mblen_buf[i] = 0;
if (begin[i] == eol)
break;
@@ -3285,7 +3305,7 @@ prepare_wc_buf (const char *begin, const char *end)
buf_end = (unsigned char *) (begin + i);
mblen_buf[i] = 0;
- inputwcs[i] = 0; /* sentinel */
+ inputwcs[i] = 0; /* sentinel */
#endif /* MBS_SUPPORT */
}
@@ -3305,15 +3325,15 @@ char *
dfaexec (struct dfa *d, char const *begin, char *end,
int allow_nl, size_t *count, int *backref)
{
- state_num s, s1; /* Current state. */
- unsigned char const *p; /* Current input character. */
- state_num **trans, *t; /* Copy of d->trans so it can be optimized
+ state_num s, s1; /* Current state. */
+ unsigned char const *p; /* Current input character. */
+ state_num **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
- unsigned char eol = eolbyte; /* Likewise for eolbyte. */
+ unsigned char eol = eolbyte; /* Likewise for eolbyte. */
unsigned char saved_end;
- if (! d->tralloc)
- build_state_zero(d);
+ if (!d->tralloc)
+ build_state_zero (d);
s = s1 = 0;
p = (unsigned char const *) begin;
@@ -3323,9 +3343,9 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (d->mb_cur_max > 1)
{
- MALLOC(mblen_buf, end - begin + 2);
- MALLOC(inputwcs, end - begin + 2);
- memset(&mbs, 0, sizeof(mbstate_t));
+ MALLOC (mblen_buf, end - begin + 2);
+ MALLOC (inputwcs, end - begin + 2);
+ memset (&mbs, 0, sizeof (mbstate_t));
prepare_wc_buf ((const char *) p, end);
}
@@ -3337,7 +3357,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (p > buf_end)
break;
s1 = s;
- SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
+ SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
if (d->states[s].mbps.nelem == 0)
{
@@ -3352,15 +3372,15 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (backref)
{
*backref = 1;
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
*end = saved_end;
return (char *) p;
}
/* Can match with a multibyte character (and multi character
collating element). Transition table might be updated. */
- s = transit_state(d, s, &p);
+ s = transit_state (d, s, &p);
trans = d->trans;
}
else
@@ -3370,7 +3390,9 @@ dfaexec (struct dfa *d, char const *begin, char *end,
s1 = t[*p++];
if ((t = trans[s1]) == NULL)
{
- state_num tmp = s; s = s1; s1 = tmp; /* swap */
+ state_num tmp = s;
+ s = s1;
+ s1 = tmp; /* swap */
break;
}
s = t[*p++];
@@ -3385,8 +3407,8 @@ dfaexec (struct dfa *d, char const *begin, char *end,
*backref = (d->states[s].backref != 0);
if (d->mb_cur_max > 1)
{
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
}
*end = saved_end;
return (char *) p;
@@ -3397,7 +3419,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
{
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
- s = transit_state(d, s, &p);
+ s = transit_state (d, s, &p);
trans = d->trans;
}
else
@@ -3409,7 +3431,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if ((char *) p <= end && p[-1] == eol)
{
if (count)
- ++*count;
+ ++ * count;
if (d->mb_cur_max > 1)
prepare_wc_buf ((const char *) p, end);
@@ -3420,8 +3442,8 @@ dfaexec (struct dfa *d, char const *begin, char *end,
{
if (d->mb_cur_max > 1)
{
- free(mblen_buf);
- free(inputwcs);
+ free (mblen_buf);
+ free (inputwcs);
}
*end = saved_end;
return NULL;
@@ -3429,7 +3451,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
if (s >= 0)
{
- build_state(s, d);
+ build_state (s, d);
trans = d->trans;
continue;
}
@@ -3449,28 +3471,28 @@ free_mbdata (struct dfa *d)
{
size_t i;
- free(d->multibyte_prop);
+ free (d->multibyte_prop);
d->multibyte_prop = NULL;
for (i = 0; i < d->nmbcsets; ++i)
{
size_t j;
struct mb_char_classes *p = &(d->mbcsets[i]);
- free(p->chars);
- free(p->ch_classes);
- free(p->range_sts);
- free(p->range_ends);
+ free (p->chars);
+ free (p->ch_classes);
+ free (p->range_sts);
+ free (p->range_ends);
for (j = 0; j < p->nequivs; ++j)
- free(p->equivs[j]);
- free(p->equivs);
+ free (p->equivs[j]);
+ free (p->equivs);
for (j = 0; j < p->ncoll_elems; ++j)
- free(p->coll_elems[j]);
- free(p->coll_elems);
+ free (p->coll_elems[j]);
+ free (p->coll_elems);
}
- free(d->mbcsets);
+ free (d->mbcsets);
d->mbcsets = NULL;
d->nmbcsets = 0;
}
@@ -3483,19 +3505,19 @@ dfainit (struct dfa *d)
memset (d, 0, sizeof *d);
d->calloc = 1;
- MALLOC(d->charclasses, d->calloc);
+ MALLOC (d->charclasses, d->calloc);
d->talloc = 1;
- MALLOC(d->tokens, d->talloc);
+ MALLOC (d->tokens, d->talloc);
d->mb_cur_max = MB_CUR_MAX;
if (d->mb_cur_max > 1)
{
d->nmultibyte_prop = 1;
- MALLOC(d->multibyte_prop, d->nmultibyte_prop);
+ MALLOC (d->multibyte_prop, d->nmultibyte_prop);
d->mbcsets_alloc = 1;
- MALLOC(d->mbcsets, d->mbcsets_alloc);
+ MALLOC (d->mbcsets, d->mbcsets_alloc);
}
}
@@ -3504,12 +3526,12 @@ dfaoptimize (struct dfa *d)
{
size_t i;
- if (!MBS_SUPPORT || !using_utf8())
+ if (!MBS_SUPPORT || !using_utf8 ())
return;
for (i = 0; i < d->tindex; ++i)
{
- switch(d->tokens[i])
+ switch (d->tokens[i])
{
case ANYCHAR:
/* Lowered. */
@@ -3530,11 +3552,11 @@ dfaoptimize (struct dfa *d)
void
dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
{
- dfainit(d);
- dfaparse(s, len, d);
- dfamust(d);
- dfaoptimize(d);
- dfaanalyze(d, searchflag);
+ dfainit (d);
+ dfaparse (s, len, d);
+ dfamust (d);
+ dfaoptimize (d);
+ dfaanalyze (d, searchflag);
}
/* Free the storage held by the components of a dfa. */
@@ -3544,35 +3566,36 @@ dfafree (struct dfa *d)
size_t i;
struct dfamust *dm, *ndm;
- free(d->charclasses);
- free(d->tokens);
+ free (d->charclasses);
+ free (d->tokens);
if (d->mb_cur_max > 1)
- free_mbdata(d);
-
- for (i = 0; i < d->sindex; ++i) {
- free(d->states[i].elems.elems);
- if (MBS_SUPPORT)
- free(d->states[i].mbps.elems);
- }
- free(d->states);
+ free_mbdata (d);
+
+ for (i = 0; i < d->sindex; ++i)
+ {
+ free (d->states[i].elems.elems);
+ if (MBS_SUPPORT)
+ free (d->states[i].mbps.elems);
+ }
+ free (d->states);
for (i = 0; i < d->tindex; ++i)
- free(d->follows[i].elems);
- free(d->follows);
+ free (d->follows[i].elems);
+ free (d->follows);
for (i = 0; i < d->tralloc; ++i)
{
- free(d->trans[i]);
- free(d->fails[i]);
+ free (d->trans[i]);
+ free (d->fails[i]);
}
- free(d->realtrans);
- free(d->fails);
- free(d->newlines);
- free(d->success);
+ free (d->realtrans);
+ free (d->fails);
+ free (d->newlines);
+ free (d->success);
for (dm = d->musts; dm; dm = ndm)
{
ndm = dm->next;
- free(dm->must);
- free(dm);
+ free (dm->must);
+ free (dm);
}
}
@@ -3685,9 +3708,9 @@ istrstr (char const *lookin, char const *lookfor)
char const *cp;
size_t len;
- len = strlen(lookfor);
+ len = strlen (lookfor);
for (cp = lookin; *cp != '\0'; ++cp)
- if (strncmp(cp, lookfor, len) == 0)
+ if (strncmp (cp, lookfor, len) == 0)
return (char *) cp;
return NULL;
}
@@ -3701,7 +3724,7 @@ freelist (char **cpp)
return;
for (i = 0; cpp[i] != NULL; ++i)
{
- free(cpp[i]);
+ free (cpp[i]);
cpp[i] = NULL;
}
}
@@ -3713,34 +3736,34 @@ enlist (char **cpp, char *new, size_t len)
if (cpp == NULL)
return NULL;
- if ((new = icpyalloc(new)) == NULL)
+ if ((new = icpyalloc (new)) == NULL)
{
- freelist(cpp);
+ freelist (cpp);
return NULL;
}
new[len] = '\0';
/* Is there already something in the list that's new (or longer)? */
for (i = 0; cpp[i] != NULL; ++i)
- if (istrstr(cpp[i], new) != NULL)
+ if (istrstr (cpp[i], new) != NULL)
{
- free(new);
+ free (new);
return cpp;
}
/* Eliminate any obsoleted strings. */
j = 0;
while (cpp[j] != NULL)
- if (istrstr(new, cpp[j]) == NULL)
+ if (istrstr (new, cpp[j]) == NULL)
++j;
else
{
- free(cpp[j]);
+ free (cpp[j]);
if (--i == j)
break;
cpp[j] = cpp[i];
cpp[i] = NULL;
}
/* Add the new string. */
- REALLOC(cpp, i + 2);
+ REALLOC (cpp, i + 2);
cpp[i] = new;
cpp[i + 1] = NULL;
return cpp;
@@ -3759,7 +3782,7 @@ comsubs (char *left, char const *right)
if (left == NULL || right == NULL)
return NULL;
- cpp = malloc(sizeof *cpp);
+ cpp = malloc (sizeof *cpp);
if (cpp == NULL)
return NULL;
cpp[0] = NULL;
@@ -3800,7 +3823,7 @@ addlists (char **old, char **new)
return NULL;
for (i = 0; new[i] != NULL; ++i)
{
- old = enlist(old, new[i], strlen(new[i]));
+ old = enlist (old, new[i], strlen (new[i]));
if (old == NULL)
break;
}
@@ -3818,7 +3841,7 @@ inboth (char **left, char **right)
if (left == NULL || right == NULL)
return NULL;
- both = malloc(sizeof *both);
+ both = malloc (sizeof *both);
if (both == NULL)
return NULL;
both[0] = NULL;
@@ -3826,15 +3849,15 @@ inboth (char **left, char **right)
{
for (rnum = 0; right[rnum] != NULL; ++rnum)
{
- temp = comsubs(left[lnum], right[rnum]);
+ temp = comsubs (left[lnum], right[rnum]);
if (temp == NULL)
{
- freelist(both);
+ freelist (both);
return NULL;
}
- both = addlists(both, temp);
- freelist(temp);
- free(temp);
+ both = addlists (both, temp);
+ freelist (temp);
+ free (temp);
if (both == NULL)
return NULL;
}
@@ -3851,10 +3874,10 @@ typedef struct
} must;
static void
-resetmust (must *mp)
+resetmust (must * mp)
{
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
- freelist(mp->in);
+ freelist (mp->in);
}
static void
@@ -3879,21 +3902,21 @@ dfamust (struct dfa *d)
mp[i] = must0;
for (i = 0; i <= d->tindex; ++i)
{
- mp[i].in = xmalloc(sizeof *mp[i].in);
- mp[i].left = xmalloc(2);
- mp[i].right = xmalloc(2);
- mp[i].is = xmalloc(2);
+ mp[i].in = xmalloc (sizeof *mp[i].in);
+ mp[i].left = xmalloc (2);
+ mp[i].right = xmalloc (2);
+ mp[i].is = xmalloc (2);
mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
mp[i].in[0] = NULL;
}
#ifdef DEBUG
- fprintf(stderr, "dfamust:\n");
+ fprintf (stderr, "dfamust:\n");
for (i = 0; i < d->tindex; ++i)
{
- fprintf(stderr, " %zd:", i);
- prtok(d->tokens[i]);
+ fprintf (stderr, " %zd:", i);
+ prtok (d->tokens[i]);
}
- putc('\n', stderr);
+ putc ('\n', stderr);
#endif
for (ri = 0; ri < d->tindex; ++ri)
{
@@ -3910,13 +3933,13 @@ dfamust (struct dfa *d)
case LIMWORD:
case NOTLIMWORD:
case BACKREF:
- resetmust(mp);
+ resetmust (mp);
break;
case STAR:
case QMARK:
assert (musts < mp);
--mp;
- resetmust(mp);
+ resetmust (mp);
break;
case OR:
assert (&musts[2] <= mp);
@@ -3937,8 +3960,8 @@ dfamust (struct dfa *d)
++i;
lmp->left[i] = '\0';
/* Right side */
- ln = strlen(lmp->right);
- rn = strlen(rmp->right);
+ ln = strlen (lmp->right);
+ rn = strlen (rmp->right);
n = ln;
if (n > rn)
n = rn;
@@ -3948,11 +3971,11 @@ dfamust (struct dfa *d)
for (j = 0; j < i; ++j)
lmp->right[j] = lmp->right[(ln - i) + j];
lmp->right[j] = '\0';
- new = inboth(lmp->in, rmp->in);
+ new = inboth (lmp->in, rmp->in);
if (new == NULL)
goto done;
- freelist(lmp->in);
- free(lmp->in);
+ freelist (lmp->in);
+ free (lmp->in);
lmp->in = new;
}
break;
@@ -3964,7 +3987,7 @@ dfamust (struct dfa *d)
case END:
assert (mp == &musts[1]);
for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen(musts[0].in[i]) > strlen(result))
+ if (strlen (musts[0].in[i]) > strlen (result))
result = musts[0].in[i];
if (STREQ (result, musts[0].is))
exact = 1;
@@ -3980,39 +4003,37 @@ dfamust (struct dfa *d)
/* In. Everything in left, plus everything in
right, plus catenation of
left's right and right's left. */
- lmp->in = addlists(lmp->in, rmp->in);
+ lmp->in = addlists (lmp->in, rmp->in);
if (lmp->in == NULL)
goto done;
- if (lmp->right[0] != '\0' &&
- rmp->left[0] != '\0')
+ if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
{
char *tp;
- tp = icpyalloc(lmp->right);
- tp = icatalloc(tp, rmp->left);
- lmp->in = enlist(lmp->in, tp, strlen(tp));
- free(tp);
+ tp = icpyalloc (lmp->right);
+ tp = icatalloc (tp, rmp->left);
+ lmp->in = enlist (lmp->in, tp, strlen (tp));
+ free (tp);
if (lmp->in == NULL)
goto done;
}
/* Left-hand */
if (lmp->is[0] != '\0')
{
- lmp->left = icatalloc(lmp->left,
- rmp->left);
+ lmp->left = icatalloc (lmp->left, rmp->left);
if (lmp->left == NULL)
goto done;
}
/* Right-hand */
if (rmp->is[0] == '\0')
lmp->right[0] = '\0';
- lmp->right = icatalloc(lmp->right, rmp->right);
+ lmp->right = icatalloc (lmp->right, rmp->right);
if (lmp->right == NULL)
goto done;
/* Guaranteed to be */
if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
{
- lmp->is = icatalloc(lmp->is, rmp->is);
+ lmp->is = icatalloc (lmp->is, rmp->is);
if (lmp->is == NULL)
goto done;
}
@@ -4030,59 +4051,55 @@ dfamust (struct dfa *d)
/* not on *my* shift */
goto done;
}
- else if (t >= CSET
- || !MBS_SUPPORT
- || t == ANYCHAR
- || t == MBCSET
- )
+ else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
{
/* easy enough */
- resetmust(mp);
+ resetmust (mp);
}
else
{
/* plain character */
- resetmust(mp);
+ resetmust (mp);
mp->is[0] = mp->left[0] = mp->right[0] = t;
mp->is[1] = mp->left[1] = mp->right[1] = '\0';
- mp->in = enlist(mp->in, mp->is, (size_t)1);
+ mp->in = enlist (mp->in, mp->is, (size_t) 1);
if (mp->in == NULL)
goto done;
}
break;
}
#ifdef DEBUG
- fprintf(stderr, " node: %zd:", ri);
- prtok(d->tokens[ri]);
- fprintf(stderr, "\n in:");
+ fprintf (stderr, " node: %zd:", ri);
+ prtok (d->tokens[ri]);
+ fprintf (stderr, "\n in:");
for (i = 0; mp->in[i]; ++i)
- fprintf(stderr, " \"%s\"", mp->in[i]);
- fprintf(stderr, "\n is: \"%s\"\n", mp->is);
- fprintf(stderr, " left: \"%s\"\n", mp->left);
- fprintf(stderr, " right: \"%s\"\n", mp->right);
+ fprintf (stderr, " \"%s\"", mp->in[i]);
+ fprintf (stderr, "\n is: \"%s\"\n", mp->is);
+ fprintf (stderr, " left: \"%s\"\n", mp->left);
+ fprintf (stderr, " right: \"%s\"\n", mp->right);
#endif
++mp;
}
- done:
- if (strlen(result))
+done:
+ if (strlen (result))
{
- MALLOC(dm, 1);
+ MALLOC (dm, 1);
dm->exact = exact;
- MALLOC(dm->must, strlen(result) + 1);
- strcpy(dm->must, result);
+ MALLOC (dm->must, strlen (result) + 1);
+ strcpy (dm->must, result);
dm->next = d->musts;
d->musts = dm;
}
mp = musts;
for (i = 0; i <= d->tindex; ++i)
{
- freelist(mp[i].in);
- free(mp[i].in);
- free(mp[i].left);
- free(mp[i].right);
- free(mp[i].is);
+ freelist (mp[i].in);
+ free (mp[i].in);
+ free (mp[i].left);
+ free (mp[i].right);
+ free (mp[i].is);
}
- free(mp);
+ free (mp);
}
struct dfa *
--
1.7.9.2.324.g1221
>From 074ae5bdbd42cfa3a5cf096833a2f55f3a2e36c3 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Thu, 1 Mar 2012 17:31:23 +0100
Subject: [PATCH 2/2] maint: manually correct some formatting in cpp
definitions
* src/dfa.c: Adjust formatting in cpp definitions.
---
src/dfa.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index ecceb3b..d7bb523 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -423,7 +423,7 @@ struct dfa
/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
specified context. */
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
- SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, prev, curr)
+ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
static void dfamust (struct dfa *dfa);
static void regexp (void);
@@ -814,7 +814,7 @@ static unsigned char const *buf_end; /* reference to end
in dfaexec(). */
else \
{ \
wchar_t _wc; \
- cur_mb_len = mbrtowc(&_wc, lexptr, lexleft, &mbs); \
+ cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
if (cur_mb_len <= 0) \
{ \
cur_mb_len = 1; \
@@ -826,7 +826,7 @@ static unsigned char const *buf_end; /* reference to end
in dfaexec(). */
lexptr += cur_mb_len; \
lexleft -= cur_mb_len; \
(wc) = _wc; \
- (c) = wctob(wc); \
+ (c) = wctob (wc); \
} \
} \
} while(0)
@@ -834,8 +834,8 @@ static unsigned char const *buf_end; /* reference to end
in dfaexec(). */
# define FETCH(c, eoferr) \
do { \
wint_t wc; \
- FETCH_WC(c, wc, eoferr); \
- } while(0)
+ FETCH_WC (c, wc, eoferr); \
+ } while (0)
#else
/* Note that characters become unsigned here. */
@@ -2864,8 +2864,8 @@ build_state_zero (struct dfa *d)
++p; \
if ((char *) p >= end) \
{ \
- free(mblen_buf); \
- free(inputwcs); \
+ free (mblen_buf); \
+ free (inputwcs); \
*end = saved_end; \
return NULL; \
} \
--
1.7.9.2.324.g1221