1169695Skan/* Definitions for data structures and routines for the regular
2169695Skan   expression library, version 0.12.
3169695Skan
4169695Skan   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993, 1995, 1996, 1997,
5169695Skan   1998, 2000, 2005 Free Software Foundation, Inc.
6169695Skan
7169695Skan   This file is part of the GNU C Library.  Its master source is NOT part of
8169695Skan   the C library, however.  The master source lives in /gd/gnu/lib.
9169695Skan
10169695Skan   The GNU C Library is free software; you can redistribute it and/or
11169695Skan   modify it under the terms of the GNU Lesser General Public
12169695Skan   License as published by the Free Software Foundation; either
13169695Skan   version 2.1 of the License, or (at your option) any later version.
14169695Skan
15169695Skan   The GNU C Library is distributed in the hope that it will be useful,
16169695Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
17169695Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18169695Skan   Lesser General Public License for more details.
19169695Skan
20169695Skan   You should have received a copy of the GNU Lesser General Public
21169695Skan   License along with the GNU C Library; if not, write to the Free
22169695Skan   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23169695Skan   02110-1301 USA.  */
24169695Skan
25169695Skan#ifndef _REGEX_H
26169695Skan#define _REGEX_H 1
27169695Skan
28169695Skan/* Allow the use in C++ code.  */
29169695Skan#ifdef __cplusplus
30169695Skanextern "C" {
31169695Skan#endif
32169695Skan
33169695Skan/* POSIX says that <sys/types.h> must be included (by the caller) before
34169695Skan   <regex.h>.  */
35169695Skan
36169695Skan#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS
37169695Skan/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
38169695Skan   should be there.  */
39169695Skan# include <stddef.h>
40169695Skan#endif
41169695Skan
42169695Skan/* The following two types have to be signed and unsigned integer type
43169695Skan   wide enough to hold a value of a pointer.  For most ANSI compilers
44169695Skan   ptrdiff_t and size_t should be likely OK.  Still size of these two
45169695Skan   types is 2 for Microsoft C.  Ugh... */
46169695Skantypedef long int s_reg_t;
47169695Skantypedef unsigned long int active_reg_t;
48169695Skan
49169695Skan/* The following bits are used to determine the regexp syntax we
50169695Skan   recognize.  The set/not-set meanings are chosen so that Emacs syntax
51169695Skan   remains the value 0.  The bits are given in alphabetical order, and
52169695Skan   the definitions shifted by one from the previous bit; thus, when we
53169695Skan   add or remove a bit, only one other definition need change.  */
54169695Skantypedef unsigned long int reg_syntax_t;
55169695Skan
56169695Skan/* If this bit is not set, then \ inside a bracket expression is literal.
57169695Skan   If set, then such a \ quotes the following character.  */
58169695Skan#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
59169695Skan
60169695Skan/* If this bit is not set, then + and ? are operators, and \+ and \? are
61169695Skan     literals.
62169695Skan   If set, then \+ and \? are operators and + and ? are literals.  */
63169695Skan#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
64169695Skan
65169695Skan/* If this bit is set, then character classes are supported.  They are:
66169695Skan     [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
67169695Skan     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
68169695Skan   If not set, then character classes are not supported.  */
69169695Skan#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
70169695Skan
71169695Skan/* If this bit is set, then ^ and $ are always anchors (outside bracket
72169695Skan     expressions, of course).
73169695Skan   If this bit is not set, then it depends:
74169695Skan        ^  is an anchor if it is at the beginning of a regular
75169695Skan           expression or after an open-group or an alternation operator;
76169695Skan        $  is an anchor if it is at the end of a regular expression, or
77169695Skan           before a close-group or an alternation operator.
78169695Skan
79169695Skan   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
80169695Skan   POSIX draft 11.2 says that * etc. in leading positions is undefined.
81169695Skan   We already implemented a previous draft which made those constructs
82169695Skan   invalid, though, so we haven't changed the code back.  */
83169695Skan#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
84169695Skan
85169695Skan/* If this bit is set, then special characters are always special
86169695Skan     regardless of where they are in the pattern.
87169695Skan   If this bit is not set, then special characters are special only in
88169695Skan     some contexts; otherwise they are ordinary.  Specifically,
89169695Skan     * + ? and intervals are only special when not after the beginning,
90169695Skan     open-group, or alternation operator.  */
91169695Skan#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
92169695Skan
93169695Skan/* If this bit is set, then *, +, ?, and { cannot be first in an re or
94169695Skan     immediately after an alternation or begin-group operator.  */
95169695Skan#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
96169695Skan
97169695Skan/* If this bit is set, then . matches newline.
98169695Skan   If not set, then it doesn't.  */
99169695Skan#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
100169695Skan
101169695Skan/* If this bit is set, then . doesn't match NUL.
102169695Skan   If not set, then it does.  */
103169695Skan#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
104169695Skan
105169695Skan/* If this bit is set, nonmatching lists [^...] do not match newline.
106169695Skan   If not set, they do.  */
107169695Skan#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
108169695Skan
109169695Skan/* If this bit is set, either \{...\} or {...} defines an
110169695Skan     interval, depending on RE_NO_BK_BRACES.
111169695Skan   If not set, \{, \}, {, and } are literals.  */
112169695Skan#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
113169695Skan
114169695Skan/* If this bit is set, +, ? and | aren't recognized as operators.
115169695Skan   If not set, they are.  */
116169695Skan#define RE_LIMITED_OPS (RE_INTERVALS << 1)
117169695Skan
118169695Skan/* If this bit is set, newline is an alternation operator.
119169695Skan   If not set, newline is literal.  */
120169695Skan#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
121169695Skan
122169695Skan/* If this bit is set, then `{...}' defines an interval, and \{ and \}
123169695Skan     are literals.
124169695Skan  If not set, then `\{...\}' defines an interval.  */
125169695Skan#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
126169695Skan
127169695Skan/* If this bit is set, (...) defines a group, and \( and \) are literals.
128169695Skan   If not set, \(...\) defines a group, and ( and ) are literals.  */
129169695Skan#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
130169695Skan
131169695Skan/* If this bit is set, then \<digit> matches <digit>.
132169695Skan   If not set, then \<digit> is a back-reference.  */
133169695Skan#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
134169695Skan
135169695Skan/* If this bit is set, then | is an alternation operator, and \| is literal.
136169695Skan   If not set, then \| is an alternation operator, and | is literal.  */
137169695Skan#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
138169695Skan
139169695Skan/* If this bit is set, then an ending range point collating higher
140169695Skan     than the starting range point, as in [z-a], is invalid.
141169695Skan   If not set, then when ending range point collates higher than the
142169695Skan     starting range point, the range is ignored.  */
143169695Skan#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
144169695Skan
145169695Skan/* If this bit is set, then an unmatched ) is ordinary.
146169695Skan   If not set, then an unmatched ) is invalid.  */
147169695Skan#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
148169695Skan
149169695Skan/* If this bit is set, succeed as soon as we match the whole pattern,
150169695Skan   without further backtracking.  */
151169695Skan#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
152169695Skan
153169695Skan/* If this bit is set, do not process the GNU regex operators.
154169695Skan   If not set, then the GNU regex operators are recognized. */
155169695Skan#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
156169695Skan
157169695Skan/* If this bit is set, turn on internal regex debugging.
158169695Skan   If not set, and debugging was on, turn it off.
159169695Skan   This only works if regex.c is compiled -DDEBUG.
160169695Skan   We define this bit always, so that all that's needed to turn on
161169695Skan   debugging is to recompile regex.c; the calling code can always have
162169695Skan   this bit set, and it won't affect anything in the normal case. */
163169695Skan#define RE_DEBUG (RE_NO_GNU_OPS << 1)
164169695Skan
165169695Skan/* If this bit is set, a syntactically invalid interval is treated as
166169695Skan   a string of ordinary characters.  For example, the ERE 'a{1' is
167169695Skan   treated as 'a\{1'.  */
168169695Skan#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
169169695Skan
170169695Skan/* This global variable defines the particular regexp syntax to use (for
171169695Skan   some interfaces).  When a regexp is compiled, the syntax used is
172169695Skan   stored in the pattern buffer, so changing this does not affect
173169695Skan   already-compiled regexps.  */
174169695Skanextern reg_syntax_t re_syntax_options;
175169695Skan
176169695Skan/* Define combinations of the above bits for the standard possibilities.
177169695Skan   (The [[[ comments delimit what gets put into the Texinfo file, so
178169695Skan   don't delete them!)  */
179169695Skan/* [[[begin syntaxes]]] */
180169695Skan#define RE_SYNTAX_EMACS 0
181169695Skan
182169695Skan#define RE_SYNTAX_AWK							\
183169695Skan  (RE_BACKSLASH_ESCAPE_IN_LISTS   | RE_DOT_NOT_NULL			\
184169695Skan   | RE_NO_BK_PARENS              | RE_NO_BK_REFS			\
185169695Skan   | RE_NO_BK_VBAR                | RE_NO_EMPTY_RANGES			\
186169695Skan   | RE_DOT_NEWLINE		  | RE_CONTEXT_INDEP_ANCHORS		\
187169695Skan   | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
188169695Skan
189169695Skan#define RE_SYNTAX_GNU_AWK						\
190169695Skan  ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG)	\
191169695Skan   & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS))
192169695Skan
193169695Skan#define RE_SYNTAX_POSIX_AWK 						\
194169695Skan  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS		\
195169695Skan   | RE_INTERVALS	    | RE_NO_GNU_OPS)
196169695Skan
197169695Skan#define RE_SYNTAX_GREP							\
198169695Skan  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
199169695Skan   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
200169695Skan   | RE_NEWLINE_ALT)
201169695Skan
202169695Skan#define RE_SYNTAX_EGREP							\
203169695Skan  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
204169695Skan   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
205169695Skan   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
206169695Skan   | RE_NO_BK_VBAR)
207169695Skan
208169695Skan#define RE_SYNTAX_POSIX_EGREP						\
209169695Skan  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES			\
210169695Skan   | RE_INVALID_INTERVAL_ORD)
211169695Skan
212169695Skan/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
213169695Skan#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
214169695Skan
215169695Skan#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
216169695Skan
217169695Skan/* Syntax bits common to both basic and extended POSIX regex syntax.  */
218169695Skan#define _RE_SYNTAX_POSIX_COMMON						\
219169695Skan  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
220169695Skan   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
221169695Skan
222169695Skan#define RE_SYNTAX_POSIX_BASIC						\
223169695Skan  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
224169695Skan
225169695Skan/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
226169695Skan   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
227169695Skan   isn't minimal, since other operators, such as \`, aren't disabled.  */
228169695Skan#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
229169695Skan  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
230169695Skan
231169695Skan#define RE_SYNTAX_POSIX_EXTENDED					\
232169695Skan  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
233169695Skan   | RE_CONTEXT_INDEP_OPS   | RE_NO_BK_BRACES				\
234169695Skan   | RE_NO_BK_PARENS        | RE_NO_BK_VBAR				\
235169695Skan   | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
236169695Skan
237169695Skan/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
238169695Skan   removed and RE_NO_BK_REFS is added.  */
239169695Skan#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
240169695Skan  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
241169695Skan   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
242169695Skan   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
243169695Skan   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
244169695Skan/* [[[end syntaxes]]] */
245169695Skan
246169695Skan/* Maximum number of duplicates an interval can allow.  Some systems
247169695Skan   (erroneously) define this in other header files, but we want our
248169695Skan   value, so remove any previous define.  */
249169695Skan#ifdef RE_DUP_MAX
250169695Skan# undef RE_DUP_MAX
251169695Skan#endif
252169695Skan/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows.  */
253169695Skan#define RE_DUP_MAX (0x7fff)
254169695Skan
255169695Skan
256169695Skan/* POSIX `cflags' bits (i.e., information for `regcomp').  */
257169695Skan
258169695Skan/* If this bit is set, then use extended regular expression syntax.
259169695Skan   If not set, then use basic regular expression syntax.  */
260169695Skan#define REG_EXTENDED 1
261169695Skan
262169695Skan/* If this bit is set, then ignore case when matching.
263169695Skan   If not set, then case is significant.  */
264169695Skan#define REG_ICASE (REG_EXTENDED << 1)
265169695Skan
266169695Skan/* If this bit is set, then anchors do not match at newline
267169695Skan     characters in the string.
268169695Skan   If not set, then anchors do match at newlines.  */
269169695Skan#define REG_NEWLINE (REG_ICASE << 1)
270169695Skan
271169695Skan/* If this bit is set, then report only success or fail in regexec.
272169695Skan   If not set, then returns differ between not matching and errors.  */
273169695Skan#define REG_NOSUB (REG_NEWLINE << 1)
274169695Skan
275169695Skan
276169695Skan/* POSIX `eflags' bits (i.e., information for regexec).  */
277169695Skan
278169695Skan/* If this bit is set, then the beginning-of-line operator doesn't match
279169695Skan     the beginning of the string (presumably because it's not the
280169695Skan     beginning of a line).
281169695Skan   If not set, then the beginning-of-line operator does match the
282169695Skan     beginning of the string.  */
283169695Skan#define REG_NOTBOL 1
284169695Skan
285169695Skan/* Like REG_NOTBOL, except for the end-of-line.  */
286169695Skan#define REG_NOTEOL (1 << 1)
287169695Skan
288169695Skan
289169695Skan/* If any error codes are removed, changed, or added, update the
290169695Skan   `re_error_msg' table in regex.c.  */
291169695Skantypedef enum
292169695Skan{
293169695Skan#ifdef _XOPEN_SOURCE
294169695Skan  REG_ENOSYS = -1,	/* This will never happen for this implementation.  */
295169695Skan#endif
296169695Skan
297169695Skan  REG_NOERROR = 0,	/* Success.  */
298169695Skan  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
299169695Skan
300169695Skan  /* POSIX regcomp return error codes.  (In the order listed in the
301169695Skan     standard.)  */
302169695Skan  REG_BADPAT,		/* Invalid pattern.  */
303169695Skan  REG_ECOLLATE,		/* Not implemented.  */
304169695Skan  REG_ECTYPE,		/* Invalid character class name.  */
305169695Skan  REG_EESCAPE,		/* Trailing backslash.  */
306169695Skan  REG_ESUBREG,		/* Invalid back reference.  */
307169695Skan  REG_EBRACK,		/* Unmatched left bracket.  */
308169695Skan  REG_EPAREN,		/* Parenthesis imbalance.  */
309169695Skan  REG_EBRACE,		/* Unmatched \{.  */
310169695Skan  REG_BADBR,		/* Invalid contents of \{\}.  */
311169695Skan  REG_ERANGE,		/* Invalid range end.  */
312169695Skan  REG_ESPACE,		/* Ran out of memory.  */
313169695Skan  REG_BADRPT,		/* No preceding re for repetition op.  */
314169695Skan
315169695Skan  /* Error codes we've added.  */
316169695Skan  REG_EEND,		/* Premature end.  */
317169695Skan  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
318169695Skan  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
319169695Skan} reg_errcode_t;
320169695Skan
321169695Skan/* This data structure represents a compiled pattern.  Before calling
322169695Skan   the pattern compiler, the fields `buffer', `allocated', `fastmap',
323169695Skan   `translate', and `no_sub' can be set.  After the pattern has been
324169695Skan   compiled, the `re_nsub' field is available.  All other fields are
325169695Skan   private to the regex routines.  */
326169695Skan
327169695Skan#ifndef RE_TRANSLATE_TYPE
328169695Skan# define RE_TRANSLATE_TYPE char *
329169695Skan#endif
330169695Skan
331169695Skanstruct re_pattern_buffer
332169695Skan{
333169695Skan/* [[[begin pattern_buffer]]] */
334169695Skan	/* Space that holds the compiled pattern.  It is declared as
335169695Skan          `unsigned char *' because its elements are
336169695Skan           sometimes used as array indexes.  */
337169695Skan  unsigned char *buffer;
338169695Skan
339169695Skan	/* Number of bytes to which `buffer' points.  */
340169695Skan  unsigned long int allocated;
341169695Skan
342169695Skan	/* Number of bytes actually used in `buffer'.  */
343169695Skan  unsigned long int used;
344169695Skan
345169695Skan        /* Syntax setting with which the pattern was compiled.  */
346169695Skan  reg_syntax_t syntax;
347169695Skan
348169695Skan        /* Pointer to a fastmap, if any, otherwise zero.  re_search uses
349169695Skan           the fastmap, if there is one, to skip over impossible
350169695Skan           starting points for matches.  */
351169695Skan  char *fastmap;
352169695Skan
353169695Skan        /* Either a translate table to apply to all characters before
354169695Skan           comparing them, or zero for no translation.  The translation
355169695Skan           is applied to a pattern when it is compiled and to a string
356169695Skan           when it is matched.  */
357169695Skan  RE_TRANSLATE_TYPE translate;
358169695Skan
359169695Skan	/* Number of subexpressions found by the compiler.  */
360169695Skan  size_t re_nsub;
361169695Skan
362169695Skan        /* Zero if this pattern cannot match the empty string, one else.
363169695Skan           Well, in truth it's used only in `re_search_2', to see
364169695Skan           whether or not we should use the fastmap, so we don't set
365169695Skan           this absolutely perfectly; see `re_compile_fastmap' (the
366169695Skan           `duplicate' case).  */
367169695Skan  unsigned can_be_null : 1;
368169695Skan
369169695Skan        /* If REGS_UNALLOCATED, allocate space in the `regs' structure
370169695Skan             for `max (RE_NREGS, re_nsub + 1)' groups.
371169695Skan           If REGS_REALLOCATE, reallocate space if necessary.
372169695Skan           If REGS_FIXED, use what's there.  */
373169695Skan#define REGS_UNALLOCATED 0
374169695Skan#define REGS_REALLOCATE 1
375169695Skan#define REGS_FIXED 2
376169695Skan  unsigned regs_allocated : 2;
377169695Skan
378169695Skan        /* Set to zero when `regex_compile' compiles a pattern; set to one
379169695Skan           by `re_compile_fastmap' if it updates the fastmap.  */
380169695Skan  unsigned fastmap_accurate : 1;
381169695Skan
382169695Skan        /* If set, `re_match_2' does not return information about
383169695Skan           subexpressions.  */
384169695Skan  unsigned no_sub : 1;
385169695Skan
386169695Skan        /* If set, a beginning-of-line anchor doesn't match at the
387169695Skan           beginning of the string.  */
388169695Skan  unsigned not_bol : 1;
389169695Skan
390169695Skan        /* Similarly for an end-of-line anchor.  */
391169695Skan  unsigned not_eol : 1;
392169695Skan
393169695Skan        /* If true, an anchor at a newline matches.  */
394169695Skan  unsigned newline_anchor : 1;
395169695Skan
396169695Skan/* [[[end pattern_buffer]]] */
397169695Skan};
398169695Skan
399169695Skantypedef struct re_pattern_buffer regex_t;
400169695Skan
401169695Skan/* Type for byte offsets within the string.  POSIX mandates this.  */
402169695Skantypedef int regoff_t;
403169695Skan
404169695Skan
405169695Skan/* This is the structure we store register match data in.  See
406169695Skan   regex.texinfo for a full description of what registers match.  */
407169695Skanstruct re_registers
408169695Skan{
409169695Skan  unsigned num_regs;
410169695Skan  regoff_t *start;
411169695Skan  regoff_t *end;
412169695Skan};
413169695Skan
414169695Skan
415169695Skan/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
416169695Skan   `re_match_2' returns information about at least this many registers
417169695Skan   the first time a `regs' structure is passed.  */
418169695Skan#ifndef RE_NREGS
419169695Skan# define RE_NREGS 30
420169695Skan#endif
421169695Skan
422169695Skan
423169695Skan/* POSIX specification for registers.  Aside from the different names than
424169695Skan   `re_registers', POSIX uses an array of structures, instead of a
425169695Skan   structure of arrays.  */
426169695Skantypedef struct
427169695Skan{
428169695Skan  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
429169695Skan  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
430169695Skan} regmatch_t;
431169695Skan
432169695Skan/* Declarations for routines.  */
433169695Skan
434169695Skan/* To avoid duplicating every routine declaration -- once with a
435169695Skan   prototype (if we are ANSI), and once without (if we aren't) -- we
436169695Skan   use the following macro to declare argument types.  This
437169695Skan   unfortunately clutters up the declarations a bit, but I think it's
438169695Skan   worth it.  */
439169695Skan
440169695Skan/* Sets the current default syntax to SYNTAX, and return the old syntax.
441169695Skan   You can also simply assign to the `re_syntax_options' variable.  */
442169695Skanextern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
443169695Skan
444169695Skan/* Compile the regular expression PATTERN, with length LENGTH
445169695Skan   and syntax given by the global `re_syntax_options', into the buffer
446169695Skan   BUFFER.  Return NULL if successful, and an error string if not.  */
447169695Skanextern const char *re_compile_pattern (const char *pattern, size_t length,
448169695Skan                                       struct re_pattern_buffer *buffer);
449169695Skan
450169695Skan
451169695Skan/* Compile a fastmap for the compiled pattern in BUFFER; used to
452169695Skan   accelerate searches.  Return 0 if successful and -2 if was an
453169695Skan   internal error.  */
454169695Skanextern int re_compile_fastmap (struct re_pattern_buffer *buffer);
455169695Skan
456169695Skan
457169695Skan/* Search in the string STRING (with length LENGTH) for the pattern
458169695Skan   compiled into BUFFER.  Start searching at position START, for RANGE
459169695Skan   characters.  Return the starting position of the match, -1 for no
460169695Skan   match, or -2 for an internal error.  Also return register
461169695Skan   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
462169695Skanextern int re_search (struct re_pattern_buffer *buffer, const char *string,
463169695Skan                      int length, int start, int range,
464169695Skan                      struct re_registers *regs);
465169695Skan
466169695Skan
467169695Skan/* Like `re_search', but search in the concatenation of STRING1 and
468169695Skan   STRING2.  Also, stop searching at index START + STOP.  */
469169695Skanextern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1,
470169695Skan                        int length1, const char *string2, int length2,
471169695Skan                        int start, int range, struct re_registers *regs,
472169695Skan                        int stop);
473169695Skan
474169695Skan
475169695Skan/* Like `re_search', but return how many characters in STRING the regexp
476169695Skan   in BUFFER matched, starting at position START.  */
477169695Skanextern int re_match (struct re_pattern_buffer *buffer, const char *string,
478169695Skan                     int length, int start, struct re_registers *regs);
479169695Skan
480169695Skan
481169695Skan/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
482169695Skanextern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1,
483169695Skan                       int length1, const char *string2, int length2,
484169695Skan                       int start, struct re_registers *regs, int stop);
485169695Skan
486169695Skan
487169695Skan/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
488169695Skan   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
489169695Skan   for recording register information.  STARTS and ENDS must be
490169695Skan   allocated with malloc, and must each be at least `NUM_REGS * sizeof
491169695Skan   (regoff_t)' bytes long.
492169695Skan
493169695Skan   If NUM_REGS == 0, then subsequent matches should allocate their own
494169695Skan   register data.
495169695Skan
496169695Skan   Unless this function is called, the first search or match using
497169695Skan   PATTERN_BUFFER will allocate its own register data, without
498169695Skan   freeing the old data.  */
499169695Skanextern void re_set_registers (struct re_pattern_buffer *buffer,
500169695Skan                              struct re_registers *regs,
501169695Skan                              unsigned num_regs, regoff_t *starts,
502169695Skan                              regoff_t *ends);
503169695Skan
504169695Skan#if defined _REGEX_RE_COMP || defined _LIBC
505169695Skan# ifndef _CRAY
506169695Skan/* 4.2 bsd compatibility.  */
507169695Skanextern char *re_comp (const char *);
508169695Skanextern int re_exec (const char *);
509169695Skan# endif
510169695Skan#endif
511169695Skan
512169695Skan/* GCC 2.95 and later have "__restrict"; C99 compilers have
513169695Skan   "restrict", and "configure" may have defined "restrict".  */
514169695Skan#ifndef __restrict
515169695Skan# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__))
516169695Skan#  if defined restrict || 199901L <= __STDC_VERSION__
517169695Skan#   define __restrict restrict
518169695Skan#  else
519169695Skan#   define __restrict
520169695Skan#  endif
521169695Skan# endif
522169695Skan#endif
523169695Skan
524169695Skan/* GCC 3.1 and later support declaring arrays as non-overlapping
525169695Skan   using the syntax array_name[restrict]  */
526169695Skan#ifndef __restrict_arr
527169695Skan# if ! (3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) || defined (__GNUG__)
528169695Skan#  define __restrict_arr
529169695Skan# else
530169695Skan#  define __restrict_arr __restrict
531169695Skan# endif
532169695Skan#endif
533169695Skan
534169695Skan/* POSIX compatibility.  */
535169695Skanextern int regcomp (regex_t *__restrict __preg,
536169695Skan                    const char *__restrict __pattern,
537169695Skan                    int __cflags);
538169695Skan
539169695Skan#if (__GNUC__)
540169695Skan__extension__
541169695Skan#endif
542169695Skanextern int regexec (const regex_t *__restrict __preg,
543169695Skan                    const char *__restrict __string, size_t __nmatch,
544169695Skan                    regmatch_t __pmatch[__restrict_arr],
545169695Skan                    int __eflags);
546169695Skan
547169695Skanextern size_t regerror (int __errcode, const regex_t *__preg,
548169695Skan                        char *__errbuf, size_t __errbuf_size);
549169695Skan
550169695Skanextern void regfree (regex_t *__preg);
551169695Skan
552169695Skan
553169695Skan#ifdef __cplusplus
554169695Skan}
555169695Skan#endif	/* C++ */
556169695Skan
557169695Skan#endif /* regex.h */
558169695Skan
559169695Skan/*
560169695SkanLocal variables:
561169695Skanmake-backup-files: t
562169695Skanversion-control: t
563169695Skantrim-versions-without-asking: nil
564169695SkanEnd:
565169695Skan*/
566