/* mbcsets -- Handle multi-byte and/or locale-dependent sets of chars. Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2014 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */ /* Written June, 1988 by Mike Haertel Modified July, 1988 by Arthur David Olson to assist BMG speedups */ /* 2014: Created by "untangle" script, written by behoffski. ?? more stuff here */ #ifndef MBCSETS_H #define MBCSETS_H 1 /* Always import environment-specific configuration items first. */ #include #include "charclass.h" #include #include /* Define the multibyte-set descriptor as an opaque type. */ typedef struct mbcsets_set_struct mbcsets_set_t; /* Prepare module for operation. */ extern void mbcsets_initialise (void); /* Destroy all classes, plus any associated resources owned by the module. */ extern void mbcsets_destroy_module (void); /* Generate a new instance of a multibyte-character set descriptor. */ extern mbcsets_set_t * mbcsets_new (void); /* By default, classes match the specified characters. Regular expressions allow this sense to be inverted, usually by the convention of "^" being the first character of a bracketed class. By default, positive sense is selected; this function lets the user specify the sense, probably to specify inverted matching. */ extern void mbcsets_set_match_sense (mbcsets_set_t *mbc, bool invert); /* Individual wide characters. */ extern void mbcsets_add_wchar (mbcsets_set_t *mbc, wint_t wc); /* Add a list of wide characters (note: not wide integers). */ extern void mbcsets_add_wchar_list (mbcsets_set_t *mbc, size_t len, wchar_t *wc_list); /* Common character classes, e.g. alpha, digit, punct etc. */ extern void mbcsets_add_class (mbcsets_set_t *mbc, wctype_t wchar_class); /* Explicit character ranges. */ extern void mbcsets_add_range (mbcsets_set_t *mbc, wint_t beg, wint_t end); /* Receive an "in-work" character class, which may or may not have members. Mbcset takes ownership of this set, and, depending on the circumstances, either maintains it internally, or else copies its contents (if any) to its internals, and releases (abandons) the supplied set. This function must not applied to a set that has been completed. */ extern void mbcsets_receive_incomplete_charclass (mbcsets_set_t *mbc, charclass_t *ccl); /* Retrieve high-level information about the class, which is useful (in fsaparse) for deciding on how to deal with it. We are forced to provide significant query resources since we demand that the type internal remain opaque (even though the initial implementation may do a poor job of this effort). */ extern void mbcsets_get_characteristics (mbcsets_set_t *mbc, bool *p_invert, charclass_t **pp_charclass, size_t *p_nchars, size_t *p_nch_classes, size_t *p_nranges, size_t *p_nequivs, size_t *p_ncoll_elems); /* Copy wide char list to caller's work area. */ extern void mbcsets_get_chars (mbcsets_set_t *mbc, wchar_t *char_list); /* Mark a set as completed; the implementation may also analyse and optimise the set at this point (e.g. use charclasses to represent unibyte characters; merge overlapping ranges; remove the individual listing of a character if it is covered by a range, etc.) In addition, note that no further changes (e.g. receive another incomplete charclass) are allowed for this set, once "completed" is called. */ extern void mbcsets_completed (mbcsets_set_t *mbc); #endif /* MBCSETS_H */ /* vim:set shiftwidth=2: */