1/* Extended regular expression matching and search library.
2   Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <http://www.gnu.org/licenses/>.  */
19
20static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21				     int n) internal_function;
22static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23static void match_ctx_free (re_match_context_t *cache) internal_function;
24static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
25					  int str_idx, int from, int to)
26     internal_function;
27static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
28     internal_function;
29static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
30					   int str_idx) internal_function;
31static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32						   int node, int str_idx)
33     internal_function;
34static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35			   re_dfastate_t **limited_sts, int last_node,
36			   int last_str_idx)
37     internal_function;
38static reg_errcode_t re_search_internal (const regex_t *preg,
39					 const char *string, int length,
40					 int start, int range, int stop,
41					 size_t nmatch, regmatch_t pmatch[],
42					 int eflags) internal_function;
43static int re_search_2_stub (struct re_pattern_buffer *bufp,
44			     const char *string1, int length1,
45			     const char *string2, int length2,
46			     int start, int range, struct re_registers *regs,
47			     int stop, int ret_len) internal_function;
48static int re_search_stub (struct re_pattern_buffer *bufp,
49			   const char *string, int length, int start,
50			   int range, int stop, struct re_registers *regs,
51			   int ret_len) internal_function;
52static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
53			      int nregs, int regs_allocated) internal_function;
54static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
55     internal_function;
56static int check_matching (re_match_context_t *mctx, int fl_longest_match,
57			   int *p_match_first) internal_function;
58static int check_halt_state_context (const re_match_context_t *mctx,
59				     const re_dfastate_t *state, int idx)
60     internal_function;
61static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
62			 regmatch_t *prev_idx_match, int cur_node,
63			 int cur_idx, int nmatch) internal_function;
64static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
65				      int str_idx, int dest_node, int nregs,
66				      regmatch_t *regs,
67				      re_node_set *eps_via_nodes)
68     internal_function;
69static reg_errcode_t set_regs (const regex_t *preg,
70			       const re_match_context_t *mctx,
71			       size_t nmatch, regmatch_t *pmatch,
72			       int fl_backtrack) internal_function;
73static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
74     internal_function;
75
76#ifdef RE_ENABLE_I18N
77static int sift_states_iter_mb (const re_match_context_t *mctx,
78				re_sift_context_t *sctx,
79				int node_idx, int str_idx, int max_str_idx)
80     internal_function;
81#endif /* RE_ENABLE_I18N */
82static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
83					   re_sift_context_t *sctx)
84     internal_function;
85static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
86					  re_sift_context_t *sctx, int str_idx,
87					  re_node_set *cur_dest)
88     internal_function;
89static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
90					      re_sift_context_t *sctx,
91					      int str_idx,
92					      re_node_set *dest_nodes)
93     internal_function;
94static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
95					    re_node_set *dest_nodes,
96					    const re_node_set *candidates)
97     internal_function;
98static int check_dst_limits (const re_match_context_t *mctx,
99			     re_node_set *limits,
100			     int dst_node, int dst_idx, int src_node,
101			     int src_idx) internal_function;
102static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
103					int boundaries, int subexp_idx,
104					int from_node, int bkref_idx)
105     internal_function;
106static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
107				      int limit, int subexp_idx,
108				      int node, int str_idx,
109				      int bkref_idx) internal_function;
110static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
111					  re_node_set *dest_nodes,
112					  const re_node_set *candidates,
113					  re_node_set *limits,
114					  struct re_backref_cache_entry *bkref_ents,
115					  int str_idx) internal_function;
116static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
117					re_sift_context_t *sctx,
118					int str_idx, const re_node_set *candidates)
119     internal_function;
120static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
121					re_dfastate_t **dst,
122					re_dfastate_t **src, int num)
123     internal_function;
124static re_dfastate_t *find_recover_state (reg_errcode_t *err,
125					 re_match_context_t *mctx) internal_function;
126static re_dfastate_t *transit_state (reg_errcode_t *err,
127				     re_match_context_t *mctx,
128				     re_dfastate_t *state) internal_function;
129static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
130					    re_match_context_t *mctx,
131					    re_dfastate_t *next_state)
132     internal_function;
133static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
134						re_node_set *cur_nodes,
135						int str_idx) internal_function;
136#if 0
137static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
138					re_match_context_t *mctx,
139					re_dfastate_t *pstate)
140     internal_function;
141#endif
142#ifdef RE_ENABLE_I18N
143static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
144				       re_dfastate_t *pstate)
145     internal_function;
146#endif /* RE_ENABLE_I18N */
147static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
148					  const re_node_set *nodes)
149     internal_function;
150static reg_errcode_t get_subexp (re_match_context_t *mctx,
151				 int bkref_node, int bkref_str_idx)
152     internal_function;
153static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
154				     const re_sub_match_top_t *sub_top,
155				     re_sub_match_last_t *sub_last,
156				     int bkref_node, int bkref_str)
157     internal_function;
158static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
159			     int subexp_idx, int type) internal_function;
160static reg_errcode_t check_arrival (re_match_context_t *mctx,
161				    state_array_t *path, int top_node,
162				    int top_str, int last_node, int last_str,
163				    int type) internal_function;
164static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
165						   int str_idx,
166						   re_node_set *cur_nodes,
167						   re_node_set *next_nodes)
168     internal_function;
169static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
170					       re_node_set *cur_nodes,
171					       int ex_subexp, int type)
172     internal_function;
173static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
174						   re_node_set *dst_nodes,
175						   int target, int ex_subexp,
176						   int type) internal_function;
177static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
178					 re_node_set *cur_nodes, int cur_str,
179					 int subexp_num, int type)
180     internal_function;
181static int build_trtable (const re_dfa_t *dfa,
182			  re_dfastate_t *state) internal_function;
183#ifdef RE_ENABLE_I18N
184static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
185				    const re_string_t *input, int idx)
186     internal_function;
187# ifdef _LIBC
188static unsigned int find_collation_sequence_value (const unsigned char *mbs,
189						   size_t name_len)
190     internal_function;
191# endif /* _LIBC */
192#endif /* RE_ENABLE_I18N */
193static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
194				       const re_dfastate_t *state,
195				       re_node_set *states_node,
196				       bitset_t *states_ch) internal_function;
197static int check_node_accept (const re_match_context_t *mctx,
198			      const re_token_t *node, int idx)
199     internal_function;
200static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
201     internal_function;
202
203/* Entry point for POSIX code.  */
204
205/* regexec searches for a given pattern, specified by PREG, in the
206   string STRING.
207
208   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
209   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
210   least NMATCH elements, and we set them to the offsets of the
211   corresponding matched substrings.
212
213   EFLAGS specifies `execution flags' which affect matching: if
214   REG_NOTBOL is set, then ^ does not match at the beginning of the
215   string; if REG_NOTEOL is set, then $ does not match at the end.
216
217   We return 0 if we find a match and REG_NOMATCH if not.  */
218
219int
220regexec (preg, string, nmatch, pmatch, eflags)
221    const regex_t *__restrict preg;
222    const char *__restrict string;
223    size_t nmatch;
224    regmatch_t pmatch[];
225    int eflags;
226{
227  reg_errcode_t err;
228  int start, length;
229  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
230
231  if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
232    return REG_BADPAT;
233
234  if (eflags & REG_STARTEND)
235    {
236      start = pmatch[0].rm_so;
237      length = pmatch[0].rm_eo;
238    }
239  else
240    {
241      start = 0;
242      length = strlen (string);
243    }
244
245  __libc_lock_lock (dfa->lock);
246  if (preg->no_sub)
247    err = re_search_internal (preg, string, length, start, length - start,
248			      length, 0, NULL, eflags);
249  else
250    err = re_search_internal (preg, string, length, start, length - start,
251			      length, nmatch, pmatch, eflags);
252  __libc_lock_unlock (dfa->lock);
253  return err != REG_NOERROR;
254}
255
256#ifdef _LIBC
257# include <shlib-compat.h>
258versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
259
260# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
261__typeof__ (__regexec) __compat_regexec;
262
263int
264attribute_compat_text_section
265__compat_regexec (const regex_t *__restrict preg,
266		  const char *__restrict string, size_t nmatch,
267		  regmatch_t pmatch[], int eflags)
268{
269  return regexec (preg, string, nmatch, pmatch,
270		  eflags & (REG_NOTBOL | REG_NOTEOL));
271}
272compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
273# endif
274#endif
275
276/* Entry points for GNU code.  */
277
278/* re_match, re_search, re_match_2, re_search_2
279
280   The former two functions operate on STRING with length LENGTH,
281   while the later two operate on concatenation of STRING1 and STRING2
282   with lengths LENGTH1 and LENGTH2, respectively.
283
284   re_match() matches the compiled pattern in BUFP against the string,
285   starting at index START.
286
287   re_search() first tries matching at index START, then it tries to match
288   starting from index START + 1, and so on.  The last start position tried
289   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
290   way as re_match().)
291
292   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
293   the first STOP characters of the concatenation of the strings should be
294   concerned.
295
296   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
297   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
298   computed relative to the concatenation, not relative to the individual
299   strings.)
300
301   On success, re_match* functions return the length of the match, re_search*
302   return the position of the start of the match.  Return value -1 means no
303   match was found and -2 indicates an internal error.  */
304
305int
306re_match (bufp, string, length, start, regs)
307    struct re_pattern_buffer *bufp;
308    const char *string;
309    int length, start;
310    struct re_registers *regs;
311{
312  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
313}
314#ifdef _LIBC
315weak_alias (__re_match, re_match)
316#endif
317
318int
319re_search (bufp, string, length, start, range, regs)
320    struct re_pattern_buffer *bufp;
321    const char *string;
322    int length, start, range;
323    struct re_registers *regs;
324{
325  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
326}
327#ifdef _LIBC
328weak_alias (__re_search, re_search)
329#endif
330
331int
332re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
333    struct re_pattern_buffer *bufp;
334    const char *string1, *string2;
335    int length1, length2, start, stop;
336    struct re_registers *regs;
337{
338  return re_search_2_stub (bufp, string1, length1, string2, length2,
339			   start, 0, regs, stop, 1);
340}
341#ifdef _LIBC
342weak_alias (__re_match_2, re_match_2)
343#endif
344
345int
346re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
347    struct re_pattern_buffer *bufp;
348    const char *string1, *string2;
349    int length1, length2, start, range, stop;
350    struct re_registers *regs;
351{
352  return re_search_2_stub (bufp, string1, length1, string2, length2,
353			   start, range, regs, stop, 0);
354}
355#ifdef _LIBC
356weak_alias (__re_search_2, re_search_2)
357#endif
358
359static int
360re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
361		  stop, ret_len)
362    struct re_pattern_buffer *bufp;
363    const char *string1, *string2;
364    int length1, length2, start, range, stop, ret_len;
365    struct re_registers *regs;
366{
367  const char *str;
368  int rval;
369  int len = length1 + length2;
370  char *s = NULL;
371
372  if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
373    return -2;
374
375  /* Concatenate the strings.  */
376  if (length2 > 0)
377    if (length1 > 0)
378      {
379	s = re_malloc (char, len);
380
381	if (BE (s == NULL, 0))
382	  return -2;
383#ifdef _LIBC
384	memcpy (__mempcpy (s, string1, length1), string2, length2);
385#else
386	memcpy (s, string1, length1);
387	memcpy (s + length1, string2, length2);
388#endif
389	str = s;
390      }
391    else
392      str = string2;
393  else
394    str = string1;
395
396  rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);
397  re_free (s);
398  return rval;
399}
400
401/* The parameters have the same meaning as those of re_search.
402   Additional parameters:
403   If RET_LEN is nonzero the length of the match is returned (re_match style);
404   otherwise the position of the match is returned.  */
405
406static int
407re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
408    struct re_pattern_buffer *bufp;
409    const char *string;
410    int length, start, range, stop, ret_len;
411    struct re_registers *regs;
412{
413  reg_errcode_t result;
414  regmatch_t *pmatch;
415  int nregs, rval;
416  int eflags = 0;
417  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
418
419  /* Check for out-of-range.  */
420  if (BE (start < 0 || start > length, 0))
421    return -1;
422  if (BE (start + range > length, 0))
423    range = length - start;
424  else if (BE (start + range < 0, 0))
425    range = -start;
426
427  __libc_lock_lock (dfa->lock);
428
429  eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
430  eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
431
432  /* Compile fastmap if we haven't yet.  */
433  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
434    re_compile_fastmap (bufp);
435
436  if (BE (bufp->no_sub, 0))
437    regs = NULL;
438
439  /* We need at least 1 register.  */
440  if (regs == NULL)
441    nregs = 1;
442  else if (BE (bufp->regs_allocated == REGS_FIXED &&
443	       regs->num_regs < bufp->re_nsub + 1, 0))
444    {
445      nregs = regs->num_regs;
446      if (BE (nregs < 1, 0))
447	{
448	  /* Nothing can be copied to regs.  */
449	  regs = NULL;
450	  nregs = 1;
451	}
452    }
453  else
454    nregs = bufp->re_nsub + 1;
455  pmatch = re_malloc (regmatch_t, nregs);
456  if (BE (pmatch == NULL, 0))
457    {
458      rval = -2;
459      goto out;
460    }
461
462  result = re_search_internal (bufp, string, length, start, range, stop,
463			       nregs, pmatch, eflags);
464
465  rval = 0;
466
467  /* I hope we needn't fill ther regs with -1's when no match was found.  */
468  if (result != REG_NOERROR)
469    rval = -1;
470  else if (regs != NULL)
471    {
472      /* If caller wants register contents data back, copy them.  */
473      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
474					   bufp->regs_allocated);
475      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
476	rval = -2;
477    }
478
479  if (BE (rval == 0, 1))
480    {
481      if (ret_len)
482	{
483	  assert (pmatch[0].rm_so == start);
484	  rval = pmatch[0].rm_eo - start;
485	}
486      else
487	rval = pmatch[0].rm_so;
488    }
489  re_free (pmatch);
490 out:
491  __libc_lock_unlock (dfa->lock);
492  return rval;
493}
494
495static unsigned
496re_copy_regs (regs, pmatch, nregs, regs_allocated)
497    struct re_registers *regs;
498    regmatch_t *pmatch;
499    int nregs, regs_allocated;
500{
501  int rval = REGS_REALLOCATE;
502  int i;
503  int need_regs = nregs + 1;
504  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
505     uses.  */
506
507  /* Have the register data arrays been allocated?  */
508  if (regs_allocated == REGS_UNALLOCATED)
509    { /* No.  So allocate them with malloc.  */
510      regs->start = re_malloc (regoff_t, need_regs);
511      if (BE (regs->start == NULL, 0))
512	return REGS_UNALLOCATED;
513      regs->end = re_malloc (regoff_t, need_regs);
514      if (BE (regs->end == NULL, 0))
515	{
516	  re_free (regs->start);
517	  return REGS_UNALLOCATED;
518	}
519      regs->num_regs = need_regs;
520    }
521  else if (regs_allocated == REGS_REALLOCATE)
522    { /* Yes.  If we need more elements than were already
523	 allocated, reallocate them.  If we need fewer, just
524	 leave it alone.  */
525      if (BE (need_regs > regs->num_regs, 0))
526	{
527	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
528	  regoff_t *new_end;
529	  if (BE (new_start == NULL, 0))
530	    return REGS_UNALLOCATED;
531	  new_end = re_realloc (regs->end, regoff_t, need_regs);
532	  if (BE (new_end == NULL, 0))
533	    {
534	      re_free (new_start);
535	      return REGS_UNALLOCATED;
536	    }
537	  regs->start = new_start;
538	  regs->end = new_end;
539	  regs->num_regs = need_regs;
540	}
541    }
542  else
543    {
544      assert (regs_allocated == REGS_FIXED);
545      /* This function may not be called with REGS_FIXED and nregs too big.  */
546      assert (regs->num_regs >= nregs);
547      rval = REGS_FIXED;
548    }
549
550  /* Copy the regs.  */
551  for (i = 0; i < nregs; ++i)
552    {
553      regs->start[i] = pmatch[i].rm_so;
554      regs->end[i] = pmatch[i].rm_eo;
555    }
556  for ( ; i < regs->num_regs; ++i)
557    regs->start[i] = regs->end[i] = -1;
558
559  return rval;
560}
561
562/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
563   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
564   this memory for recording register information.  STARTS and ENDS
565   must be allocated using the malloc library routine, and must each
566   be at least NUM_REGS * sizeof (regoff_t) bytes long.
567
568   If NUM_REGS == 0, then subsequent matches should allocate their own
569   register data.
570
571   Unless this function is called, the first search or match using
572   PATTERN_BUFFER will allocate its own register data, without
573   freeing the old data.  */
574
575void
576re_set_registers (bufp, regs, num_regs, starts, ends)
577    struct re_pattern_buffer *bufp;
578    struct re_registers *regs;
579    unsigned num_regs;
580    regoff_t *starts, *ends;
581{
582  if (num_regs)
583    {
584      bufp->regs_allocated = REGS_REALLOCATE;
585      regs->num_regs = num_regs;
586      regs->start = starts;
587      regs->end = ends;
588    }
589  else
590    {
591      bufp->regs_allocated = REGS_UNALLOCATED;
592      regs->num_regs = 0;
593      regs->start = regs->end = (regoff_t *) 0;
594    }
595}
596#ifdef _LIBC
597weak_alias (__re_set_registers, re_set_registers)
598#endif
599
600/* Entry points compatible with 4.2 BSD regex library.  We don't define
601   them unless specifically requested.  */
602
603#if defined _REGEX_RE_COMP || defined _LIBC
604int
605# ifdef _LIBC
606weak_function
607# endif
608re_exec (s)
609     const char *s;
610{
611  return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
612}
613#endif /* _REGEX_RE_COMP */
614
615/* Internal entry point.  */
616
617/* Searches for a compiled pattern PREG in the string STRING, whose
618   length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
619   mingings with regexec.  START, and RANGE have the same meanings
620   with re_search.
621   Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
622   otherwise return the error code.
623   Note: We assume front end functions already check ranges.
624   (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
625
626static reg_errcode_t
627__attribute_warn_unused_result__
628re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
629		    eflags)
630    const regex_t *preg;
631    const char *string;
632    int length, start, range, stop, eflags;
633    size_t nmatch;
634    regmatch_t pmatch[];
635{
636  reg_errcode_t err;
637  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
638  int left_lim, right_lim, incr;
639  int fl_longest_match, match_first, match_kind, match_last = -1;
640  int extra_nmatch;
641  int sb, ch;
642#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
643  re_match_context_t mctx = { .dfa = dfa };
644#else
645  re_match_context_t mctx;
646#endif
647  char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
648		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
649  RE_TRANSLATE_TYPE t = preg->translate;
650
651#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
652  memset (&mctx, '\0', sizeof (re_match_context_t));
653  mctx.dfa = dfa;
654#endif
655
656  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
657  nmatch -= extra_nmatch;
658
659  /* Check if the DFA haven't been compiled.  */
660  if (BE (preg->used == 0 || dfa->init_state == NULL
661	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
662	  || dfa->init_state_begbuf == NULL, 0))
663    return REG_NOMATCH;
664
665#ifdef DEBUG
666  /* We assume front-end functions already check them.  */
667  assert (start + range >= 0 && start + range <= length);
668#endif
669
670  /* If initial states with non-begbuf contexts have no elements,
671     the regex must be anchored.  If preg->newline_anchor is set,
672     we'll never use init_state_nl, so do not check it.  */
673  if (dfa->init_state->nodes.nelem == 0
674      && dfa->init_state_word->nodes.nelem == 0
675      && (dfa->init_state_nl->nodes.nelem == 0
676	  || !preg->newline_anchor))
677    {
678      if (start != 0 && start + range != 0)
679	return REG_NOMATCH;
680      start = range = 0;
681    }
682
683  /* We must check the longest matching, if nmatch > 0.  */
684  fl_longest_match = (nmatch != 0 || dfa->nbackref);
685
686  err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
687			    preg->translate, preg->syntax & RE_ICASE, dfa);
688  if (BE (err != REG_NOERROR, 0))
689    goto free_return;
690  mctx.input.stop = stop;
691  mctx.input.raw_stop = stop;
692  mctx.input.newline_anchor = preg->newline_anchor;
693
694  err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
695  if (BE (err != REG_NOERROR, 0))
696    goto free_return;
697
698  /* We will log all the DFA states through which the dfa pass,
699     if nmatch > 1, or this dfa has "multibyte node", which is a
700     back-reference or a node which can accept multibyte character or
701     multi character collating element.  */
702  if (nmatch > 1 || dfa->has_mb_node)
703    {
704      /* Avoid overflow.  */
705      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
706	{
707	  err = REG_ESPACE;
708	  goto free_return;
709	}
710
711      mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
712      if (BE (mctx.state_log == NULL, 0))
713	{
714	  err = REG_ESPACE;
715	  goto free_return;
716	}
717    }
718  else
719    mctx.state_log = NULL;
720
721  match_first = start;
722  mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
723			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
724
725  /* Check incrementally whether of not the input string match.  */
726  incr = (range < 0) ? -1 : 1;
727  left_lim = (range < 0) ? start + range : start;
728  right_lim = (range < 0) ? start : start + range;
729  sb = dfa->mb_cur_max == 1;
730  match_kind =
731    (fastmap
732     ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
733	| (range >= 0 ? 2 : 0)
734	| (t != NULL ? 1 : 0))
735     : 8);
736
737  for (;; match_first += incr)
738    {
739      err = REG_NOMATCH;
740      if (match_first < left_lim || right_lim < match_first)
741	goto free_return;
742
743      /* Advance as rapidly as possible through the string, until we
744	 find a plausible place to start matching.  This may be done
745	 with varying efficiency, so there are various possibilities:
746	 only the most common of them are specialized, in order to
747	 save on code size.  We use a switch statement for speed.  */
748      switch (match_kind)
749	{
750	case 8:
751	  /* No fastmap.  */
752	  break;
753
754	case 7:
755	  /* Fastmap with single-byte translation, match forward.  */
756	  while (BE (match_first < right_lim, 1)
757		 && !fastmap[t[(unsigned char) string[match_first]]])
758	    ++match_first;
759	  goto forward_match_found_start_or_reached_end;
760
761	case 6:
762	  /* Fastmap without translation, match forward.  */
763	  while (BE (match_first < right_lim, 1)
764		 && !fastmap[(unsigned char) string[match_first]])
765	    ++match_first;
766
767	forward_match_found_start_or_reached_end:
768	  if (BE (match_first == right_lim, 0))
769	    {
770	      ch = match_first >= length
771		       ? 0 : (unsigned char) string[match_first];
772	      if (!fastmap[t ? t[ch] : ch])
773		goto free_return;
774	    }
775	  break;
776
777	case 4:
778	case 5:
779	  /* Fastmap without multi-byte translation, match backwards.  */
780	  while (match_first >= left_lim)
781	    {
782	      ch = match_first >= length
783		       ? 0 : (unsigned char) string[match_first];
784	      if (fastmap[t ? t[ch] : ch])
785		break;
786	      --match_first;
787	    }
788	  if (match_first < left_lim)
789	    goto free_return;
790	  break;
791
792	default:
793	  /* In this case, we can't determine easily the current byte,
794	     since it might be a component byte of a multibyte
795	     character.  Then we use the constructed buffer instead.  */
796	  for (;;)
797	    {
798	      /* If MATCH_FIRST is out of the valid range, reconstruct the
799		 buffers.  */
800	      unsigned int offset = match_first - mctx.input.raw_mbs_idx;
801	      if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
802		{
803		  err = re_string_reconstruct (&mctx.input, match_first,
804					       eflags);
805		  if (BE (err != REG_NOERROR, 0))
806		    goto free_return;
807
808		  offset = match_first - mctx.input.raw_mbs_idx;
809		}
810	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
811		 Note that MATCH_FIRST must not be smaller than 0.  */
812	      ch = (match_first >= length
813		    ? 0 : re_string_byte_at (&mctx.input, offset));
814	      if (fastmap[ch])
815		break;
816	      match_first += incr;
817	      if (match_first < left_lim || match_first > right_lim)
818		{
819		  err = REG_NOMATCH;
820		  goto free_return;
821		}
822	    }
823	  break;
824	}
825
826      /* Reconstruct the buffers so that the matcher can assume that
827	 the matching starts from the beginning of the buffer.  */
828      err = re_string_reconstruct (&mctx.input, match_first, eflags);
829      if (BE (err != REG_NOERROR, 0))
830	goto free_return;
831
832#ifdef RE_ENABLE_I18N
833     /* Don't consider this char as a possible match start if it part,
834	yet isn't the head, of a multibyte character.  */
835      if (!sb && !re_string_first_byte (&mctx.input, 0))
836	continue;
837#endif
838
839      /* It seems to be appropriate one, then use the matcher.  */
840      /* We assume that the matching starts from 0.  */
841      mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
842      match_last = check_matching (&mctx, fl_longest_match,
843				   range >= 0 ? &match_first : NULL);
844      if (match_last != -1)
845	{
846	  if (BE (match_last == -2, 0))
847	    {
848	      err = REG_ESPACE;
849	      goto free_return;
850	    }
851	  else
852	    {
853	      mctx.match_last = match_last;
854	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
855		{
856		  re_dfastate_t *pstate = mctx.state_log[match_last];
857		  mctx.last_node = check_halt_state_context (&mctx, pstate,
858							     match_last);
859		}
860	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
861		  || dfa->nbackref)
862		{
863		  err = prune_impossible_nodes (&mctx);
864		  if (err == REG_NOERROR)
865		    break;
866		  if (BE (err != REG_NOMATCH, 0))
867		    goto free_return;
868		  match_last = -1;
869		}
870	      else
871		break; /* We found a match.  */
872	    }
873	}
874
875      match_ctx_clean (&mctx);
876    }
877
878#ifdef DEBUG
879  assert (match_last != -1);
880  assert (err == REG_NOERROR);
881#endif
882
883  /* Set pmatch[] if we need.  */
884  if (nmatch > 0)
885    {
886      int reg_idx;
887
888      /* Initialize registers.  */
889      for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
890	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
891
892      /* Set the points where matching start/end.  */
893      pmatch[0].rm_so = 0;
894      pmatch[0].rm_eo = mctx.match_last;
895
896      if (!preg->no_sub && nmatch > 1)
897	{
898	  err = set_regs (preg, &mctx, nmatch, pmatch,
899			  dfa->has_plural_match && dfa->nbackref > 0);
900	  if (BE (err != REG_NOERROR, 0))
901	    goto free_return;
902	}
903
904      /* At last, add the offset to the each registers, since we slided
905	 the buffers so that we could assume that the matching starts
906	 from 0.  */
907      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
908	if (pmatch[reg_idx].rm_so != -1)
909	  {
910#ifdef RE_ENABLE_I18N
911	    if (BE (mctx.input.offsets_needed != 0, 0))
912	      {
913		pmatch[reg_idx].rm_so =
914		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
915		   ? mctx.input.valid_raw_len
916		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
917		pmatch[reg_idx].rm_eo =
918		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
919		   ? mctx.input.valid_raw_len
920		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
921	      }
922#else
923	    assert (mctx.input.offsets_needed == 0);
924#endif
925	    pmatch[reg_idx].rm_so += match_first;
926	    pmatch[reg_idx].rm_eo += match_first;
927	  }
928      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
929	{
930	  pmatch[nmatch + reg_idx].rm_so = -1;
931	  pmatch[nmatch + reg_idx].rm_eo = -1;
932	}
933
934      if (dfa->subexp_map)
935	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
936	  if (dfa->subexp_map[reg_idx] != reg_idx)
937	    {
938	      pmatch[reg_idx + 1].rm_so
939		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
940	      pmatch[reg_idx + 1].rm_eo
941		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
942	    }
943    }
944
945 free_return:
946  re_free (mctx.state_log);
947  if (dfa->nbackref)
948    match_ctx_free (&mctx);
949  re_string_destruct (&mctx.input);
950  return err;
951}
952
953static reg_errcode_t
954__attribute_warn_unused_result__
955prune_impossible_nodes (mctx)
956     re_match_context_t *mctx;
957{
958  const re_dfa_t *const dfa = mctx->dfa;
959  int halt_node, match_last;
960  reg_errcode_t ret;
961  re_dfastate_t **sifted_states;
962  re_dfastate_t **lim_states = NULL;
963  re_sift_context_t sctx;
964#ifdef DEBUG
965  assert (mctx->state_log != NULL);
966#endif
967  match_last = mctx->match_last;
968  halt_node = mctx->last_node;
969
970  /* Avoid overflow.  */
971  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
972    return REG_ESPACE;
973
974  sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
975  if (BE (sifted_states == NULL, 0))
976    {
977      ret = REG_ESPACE;
978      goto free_return;
979    }
980  if (dfa->nbackref)
981    {
982      lim_states = re_malloc (re_dfastate_t *, match_last + 1);
983      if (BE (lim_states == NULL, 0))
984	{
985	  ret = REG_ESPACE;
986	  goto free_return;
987	}
988      while (1)
989	{
990	  memset (lim_states, '\0',
991		  sizeof (re_dfastate_t *) * (match_last + 1));
992	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
993			 match_last);
994	  ret = sift_states_backward (mctx, &sctx);
995	  re_node_set_free (&sctx.limits);
996	  if (BE (ret != REG_NOERROR, 0))
997	      goto free_return;
998	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
999	    break;
1000	  do
1001	    {
1002	      --match_last;
1003	      if (match_last < 0)
1004		{
1005		  ret = REG_NOMATCH;
1006		  goto free_return;
1007		}
1008	    } while (mctx->state_log[match_last] == NULL
1009		     || !mctx->state_log[match_last]->halt);
1010	  halt_node = check_halt_state_context (mctx,
1011						mctx->state_log[match_last],
1012						match_last);
1013	}
1014      ret = merge_state_array (dfa, sifted_states, lim_states,
1015			       match_last + 1);
1016      re_free (lim_states);
1017      lim_states = NULL;
1018      if (BE (ret != REG_NOERROR, 0))
1019	goto free_return;
1020    }
1021  else
1022    {
1023      sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1024      ret = sift_states_backward (mctx, &sctx);
1025      re_node_set_free (&sctx.limits);
1026      if (BE (ret != REG_NOERROR, 0))
1027	goto free_return;
1028      if (sifted_states[0] == NULL)
1029	{
1030	  ret = REG_NOMATCH;
1031	  goto free_return;
1032	}
1033    }
1034  re_free (mctx->state_log);
1035  mctx->state_log = sifted_states;
1036  sifted_states = NULL;
1037  mctx->last_node = halt_node;
1038  mctx->match_last = match_last;
1039  ret = REG_NOERROR;
1040 free_return:
1041  re_free (sifted_states);
1042  re_free (lim_states);
1043  return ret;
1044}
1045
1046/* Acquire an initial state and return it.
1047   We must select appropriate initial state depending on the context,
1048   since initial states may have constraints like "\<", "^", etc..  */
1049
1050static inline re_dfastate_t *
1051__attribute ((always_inline)) internal_function
1052acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1053			    int idx)
1054{
1055  const re_dfa_t *const dfa = mctx->dfa;
1056  if (dfa->init_state->has_constraint)
1057    {
1058      unsigned int context;
1059      context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1060      if (IS_WORD_CONTEXT (context))
1061	return dfa->init_state_word;
1062      else if (IS_ORDINARY_CONTEXT (context))
1063	return dfa->init_state;
1064      else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1065	return dfa->init_state_begbuf;
1066      else if (IS_NEWLINE_CONTEXT (context))
1067	return dfa->init_state_nl;
1068      else if (IS_BEGBUF_CONTEXT (context))
1069	{
1070	  /* It is relatively rare case, then calculate on demand.  */
1071	  return re_acquire_state_context (err, dfa,
1072					   dfa->init_state->entrance_nodes,
1073					   context);
1074	}
1075      else
1076	/* Must not happen?  */
1077	return dfa->init_state;
1078    }
1079  else
1080    return dfa->init_state;
1081}
1082
1083/* Check whether the regular expression match input string INPUT or not,
1084   and return the index where the matching end, return -1 if not match,
1085   or return -2 in case of an error.
1086   FL_LONGEST_MATCH means we want the POSIX longest matching.
1087   If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1088   next place where we may want to try matching.
1089   Note that the matcher assume that the maching starts from the current
1090   index of the buffer.  */
1091
1092static int
1093internal_function __attribute_warn_unused_result__
1094check_matching (re_match_context_t *mctx, int fl_longest_match,
1095		int *p_match_first)
1096{
1097  const re_dfa_t *const dfa = mctx->dfa;
1098  reg_errcode_t err;
1099  int match = 0;
1100  int match_last = -1;
1101  int cur_str_idx = re_string_cur_idx (&mctx->input);
1102  re_dfastate_t *cur_state;
1103  int at_init_state = p_match_first != NULL;
1104  int next_start_idx = cur_str_idx;
1105
1106  err = REG_NOERROR;
1107  cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1108  /* An initial state must not be NULL (invalid).  */
1109  if (BE (cur_state == NULL, 0))
1110    {
1111      assert (err == REG_ESPACE);
1112      return -2;
1113    }
1114
1115  if (mctx->state_log != NULL)
1116    {
1117      mctx->state_log[cur_str_idx] = cur_state;
1118
1119      /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1120	 later.  E.g. Processing back references.  */
1121      if (BE (dfa->nbackref, 0))
1122	{
1123	  at_init_state = 0;
1124	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1125	  if (BE (err != REG_NOERROR, 0))
1126	    return err;
1127
1128	  if (cur_state->has_backref)
1129	    {
1130	      err = transit_state_bkref (mctx, &cur_state->nodes);
1131	      if (BE (err != REG_NOERROR, 0))
1132		return err;
1133	    }
1134	}
1135    }
1136
1137  /* If the RE accepts NULL string.  */
1138  if (BE (cur_state->halt, 0))
1139    {
1140      if (!cur_state->has_constraint
1141	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1142	{
1143	  if (!fl_longest_match)
1144	    return cur_str_idx;
1145	  else
1146	    {
1147	      match_last = cur_str_idx;
1148	      match = 1;
1149	    }
1150	}
1151    }
1152
1153  while (!re_string_eoi (&mctx->input))
1154    {
1155      re_dfastate_t *old_state = cur_state;
1156      int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1157
1158      if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1159	   && mctx->input.bufs_len < mctx->input.len)
1160	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1161	      && mctx->input.valid_len < mctx->input.len))
1162	{
1163	  err = extend_buffers (mctx, next_char_idx + 1);
1164	  if (BE (err != REG_NOERROR, 0))
1165	    {
1166	      assert (err == REG_ESPACE);
1167	      return -2;
1168	    }
1169	}
1170
1171      cur_state = transit_state (&err, mctx, cur_state);
1172      if (mctx->state_log != NULL)
1173	cur_state = merge_state_with_log (&err, mctx, cur_state);
1174
1175      if (cur_state == NULL)
1176	{
1177	  /* Reached the invalid state or an error.  Try to recover a valid
1178	     state using the state log, if available and if we have not
1179	     already found a valid (even if not the longest) match.  */
1180	  if (BE (err != REG_NOERROR, 0))
1181	    return -2;
1182
1183	  if (mctx->state_log == NULL
1184	      || (match && !fl_longest_match)
1185	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1186	    break;
1187	}
1188
1189      if (BE (at_init_state, 0))
1190	{
1191	  if (old_state == cur_state)
1192	    next_start_idx = next_char_idx;
1193	  else
1194	    at_init_state = 0;
1195	}
1196
1197      if (cur_state->halt)
1198	{
1199	  /* Reached a halt state.
1200	     Check the halt state can satisfy the current context.  */
1201	  if (!cur_state->has_constraint
1202	      || check_halt_state_context (mctx, cur_state,
1203					   re_string_cur_idx (&mctx->input)))
1204	    {
1205	      /* We found an appropriate halt state.  */
1206	      match_last = re_string_cur_idx (&mctx->input);
1207	      match = 1;
1208
1209	      /* We found a match, do not modify match_first below.  */
1210	      p_match_first = NULL;
1211	      if (!fl_longest_match)
1212		break;
1213	    }
1214	}
1215    }
1216
1217  if (p_match_first)
1218    *p_match_first += next_start_idx;
1219
1220  return match_last;
1221}
1222
1223/* Check NODE match the current context.  */
1224
1225static int
1226internal_function
1227check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)
1228{
1229  re_token_type_t type = dfa->nodes[node].type;
1230  unsigned int constraint = dfa->nodes[node].constraint;
1231  if (type != END_OF_RE)
1232    return 0;
1233  if (!constraint)
1234    return 1;
1235  if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1236    return 0;
1237  return 1;
1238}
1239
1240/* Check the halt state STATE match the current context.
1241   Return 0 if not match, if the node, STATE has, is a halt node and
1242   match the context, return the node.  */
1243
1244static int
1245internal_function
1246check_halt_state_context (const re_match_context_t *mctx,
1247			  const re_dfastate_t *state, int idx)
1248{
1249  int i;
1250  unsigned int context;
1251#ifdef DEBUG
1252  assert (state->halt);
1253#endif
1254  context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1255  for (i = 0; i < state->nodes.nelem; ++i)
1256    if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1257      return state->nodes.elems[i];
1258  return 0;
1259}
1260
1261/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1262   corresponding to the DFA).
1263   Return the destination node, and update EPS_VIA_NODES, return -1 in case
1264   of errors.  */
1265
1266static int
1267internal_function
1268proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,
1269		   int *pidx, int node, re_node_set *eps_via_nodes,
1270		   struct re_fail_stack_t *fs)
1271{
1272  const re_dfa_t *const dfa = mctx->dfa;
1273  int i, err;
1274  if (IS_EPSILON_NODE (dfa->nodes[node].type))
1275    {
1276      re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1277      re_node_set *edests = &dfa->edests[node];
1278      int dest_node;
1279      err = re_node_set_insert (eps_via_nodes, node);
1280      if (BE (err < 0, 0))
1281	return -2;
1282      /* Pick up a valid destination, or return -1 if none is found.  */
1283      for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1284	{
1285	  int candidate = edests->elems[i];
1286	  if (!re_node_set_contains (cur_nodes, candidate))
1287	    continue;
1288	  if (dest_node == -1)
1289	    dest_node = candidate;
1290
1291	  else
1292	    {
1293	      /* In order to avoid infinite loop like "(a*)*", return the second
1294		 epsilon-transition if the first was already considered.  */
1295	      if (re_node_set_contains (eps_via_nodes, dest_node))
1296		return candidate;
1297
1298	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1299	      else if (fs != NULL
1300		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1301					   eps_via_nodes))
1302		return -2;
1303
1304	      /* We know we are going to exit.  */
1305	      break;
1306	    }
1307	}
1308      return dest_node;
1309    }
1310  else
1311    {
1312      int naccepted = 0;
1313      re_token_type_t type = dfa->nodes[node].type;
1314
1315#ifdef RE_ENABLE_I18N
1316      if (dfa->nodes[node].accept_mb)
1317	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1318      else
1319#endif /* RE_ENABLE_I18N */
1320      if (type == OP_BACK_REF)
1321	{
1322	  int subexp_idx = dfa->nodes[node].opr.idx + 1;
1323	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1324	  if (fs != NULL)
1325	    {
1326	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1327		return -1;
1328	      else if (naccepted)
1329		{
1330		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1331		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1332			      naccepted) != 0)
1333		    return -1;
1334		}
1335	    }
1336
1337	  if (naccepted == 0)
1338	    {
1339	      int dest_node;
1340	      err = re_node_set_insert (eps_via_nodes, node);
1341	      if (BE (err < 0, 0))
1342		return -2;
1343	      dest_node = dfa->edests[node].elems[0];
1344	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1345					dest_node))
1346		return dest_node;
1347	    }
1348	}
1349
1350      if (naccepted != 0
1351	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1352	{
1353	  int dest_node = dfa->nexts[node];
1354	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1355	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1356		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1357					       dest_node)))
1358	    return -1;
1359	  re_node_set_empty (eps_via_nodes);
1360	  return dest_node;
1361	}
1362    }
1363  return -1;
1364}
1365
1366static reg_errcode_t
1367internal_function __attribute_warn_unused_result__
1368push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,
1369		 int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1370{
1371  reg_errcode_t err;
1372  int num = fs->num++;
1373  if (fs->num == fs->alloc)
1374    {
1375      struct re_fail_stack_ent_t *new_array;
1376      new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1377				       * fs->alloc * 2));
1378      if (new_array == NULL)
1379	return REG_ESPACE;
1380      fs->alloc *= 2;
1381      fs->stack = new_array;
1382    }
1383  fs->stack[num].idx = str_idx;
1384  fs->stack[num].node = dest_node;
1385  fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1386  if (fs->stack[num].regs == NULL)
1387    return REG_ESPACE;
1388  memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1389  err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1390  return err;
1391}
1392
1393static int
1394internal_function
1395pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
1396		regmatch_t *regs, re_node_set *eps_via_nodes)
1397{
1398  int num = --fs->num;
1399  assert (num >= 0);
1400  *pidx = fs->stack[num].idx;
1401  memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1402  re_node_set_free (eps_via_nodes);
1403  re_free (fs->stack[num].regs);
1404  *eps_via_nodes = fs->stack[num].eps_via_nodes;
1405  return fs->stack[num].node;
1406}
1407
1408/* Set the positions where the subexpressions are starts/ends to registers
1409   PMATCH.
1410   Note: We assume that pmatch[0] is already set, and
1411   pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1412
1413static reg_errcode_t
1414internal_function __attribute_warn_unused_result__
1415set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1416	  regmatch_t *pmatch, int fl_backtrack)
1417{
1418  const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
1419  int idx, cur_node;
1420  re_node_set eps_via_nodes;
1421  struct re_fail_stack_t *fs;
1422  struct re_fail_stack_t fs_body = { 0, 2, NULL };
1423  regmatch_t *prev_idx_match;
1424  int prev_idx_match_malloced = 0;
1425
1426#ifdef DEBUG
1427  assert (nmatch > 1);
1428  assert (mctx->state_log != NULL);
1429#endif
1430  if (fl_backtrack)
1431    {
1432      fs = &fs_body;
1433      fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1434      if (fs->stack == NULL)
1435	return REG_ESPACE;
1436    }
1437  else
1438    fs = NULL;
1439
1440  cur_node = dfa->init_node;
1441  re_node_set_init_empty (&eps_via_nodes);
1442
1443  if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1444    prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1445  else
1446    {
1447      prev_idx_match = re_malloc (regmatch_t, nmatch);
1448      if (prev_idx_match == NULL)
1449	{
1450	  free_fail_stack_return (fs);
1451	  return REG_ESPACE;
1452	}
1453      prev_idx_match_malloced = 1;
1454    }
1455  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1456
1457  for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1458    {
1459      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1460
1461      if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1462	{
1463	  int reg_idx;
1464	  if (fs)
1465	    {
1466	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1467		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1468		  break;
1469	      if (reg_idx == nmatch)
1470		{
1471		  re_node_set_free (&eps_via_nodes);
1472		  if (prev_idx_match_malloced)
1473		    re_free (prev_idx_match);
1474		  return free_fail_stack_return (fs);
1475		}
1476	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1477					 &eps_via_nodes);
1478	    }
1479	  else
1480	    {
1481	      re_node_set_free (&eps_via_nodes);
1482	      if (prev_idx_match_malloced)
1483		re_free (prev_idx_match);
1484	      return REG_NOERROR;
1485	    }
1486	}
1487
1488      /* Proceed to next node.  */
1489      cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1490				    &eps_via_nodes, fs);
1491
1492      if (BE (cur_node < 0, 0))
1493	{
1494	  if (BE (cur_node == -2, 0))
1495	    {
1496	      re_node_set_free (&eps_via_nodes);
1497	      if (prev_idx_match_malloced)
1498		re_free (prev_idx_match);
1499	      free_fail_stack_return (fs);
1500	      return REG_ESPACE;
1501	    }
1502	  if (fs)
1503	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1504				       &eps_via_nodes);
1505	  else
1506	    {
1507	      re_node_set_free (&eps_via_nodes);
1508	      if (prev_idx_match_malloced)
1509		re_free (prev_idx_match);
1510	      return REG_NOMATCH;
1511	    }
1512	}
1513    }
1514  re_node_set_free (&eps_via_nodes);
1515  if (prev_idx_match_malloced)
1516    re_free (prev_idx_match);
1517  return free_fail_stack_return (fs);
1518}
1519
1520static reg_errcode_t
1521internal_function
1522free_fail_stack_return (struct re_fail_stack_t *fs)
1523{
1524  if (fs)
1525    {
1526      int fs_idx;
1527      for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1528	{
1529	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1530	  re_free (fs->stack[fs_idx].regs);
1531	}
1532      re_free (fs->stack);
1533    }
1534  return REG_NOERROR;
1535}
1536
1537static void
1538internal_function
1539update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1540	     regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)
1541{
1542  int type = dfa->nodes[cur_node].type;
1543  if (type == OP_OPEN_SUBEXP)
1544    {
1545      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1546
1547      /* We are at the first node of this sub expression.  */
1548      if (reg_num < nmatch)
1549	{
1550	  pmatch[reg_num].rm_so = cur_idx;
1551	  pmatch[reg_num].rm_eo = -1;
1552	}
1553    }
1554  else if (type == OP_CLOSE_SUBEXP)
1555    {
1556      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
1557      if (reg_num < nmatch)
1558	{
1559	  /* We are at the last node of this sub expression.  */
1560	  if (pmatch[reg_num].rm_so < cur_idx)
1561	    {
1562	      pmatch[reg_num].rm_eo = cur_idx;
1563	      /* This is a non-empty match or we are not inside an optional
1564		 subexpression.  Accept this right away.  */
1565	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1566	    }
1567	  else
1568	    {
1569	      if (dfa->nodes[cur_node].opt_subexp
1570		  && prev_idx_match[reg_num].rm_so != -1)
1571		/* We transited through an empty match for an optional
1572		   subexpression, like (a?)*, and this is not the subexp's
1573		   first match.  Copy back the old content of the registers
1574		   so that matches of an inner subexpression are undone as
1575		   well, like in ((a?))*.  */
1576		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1577	      else
1578		/* We completed a subexpression, but it may be part of
1579		   an optional one, so do not update PREV_IDX_MATCH.  */
1580		pmatch[reg_num].rm_eo = cur_idx;
1581	    }
1582	}
1583    }
1584}
1585
1586/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1587   and sift the nodes in each states according to the following rules.
1588   Updated state_log will be wrote to STATE_LOG.
1589
1590   Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1591     1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1592	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1593	the LAST_NODE, we throw away the node `a'.
1594     2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1595	string `s' and transit to `b':
1596	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1597	   away the node `a'.
1598	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1599	    thrown away, we throw away the node `a'.
1600     3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1601	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1602	   node `a'.
1603	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1604	    we throw away the node `a'.  */
1605
1606#define STATE_NODE_CONTAINS(state,node) \
1607  ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1608
1609static reg_errcode_t
1610internal_function
1611sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1612{
1613  reg_errcode_t err;
1614  int null_cnt = 0;
1615  int str_idx = sctx->last_str_idx;
1616  re_node_set cur_dest;
1617
1618#ifdef DEBUG
1619  assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1620#endif
1621
1622  /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1623     transit to the last_node and the last_node itself.  */
1624  err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1625  if (BE (err != REG_NOERROR, 0))
1626    return err;
1627  err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1628  if (BE (err != REG_NOERROR, 0))
1629    goto free_return;
1630
1631  /* Then check each states in the state_log.  */
1632  while (str_idx > 0)
1633    {
1634      /* Update counters.  */
1635      null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1636      if (null_cnt > mctx->max_mb_elem_len)
1637	{
1638	  memset (sctx->sifted_states, '\0',
1639		  sizeof (re_dfastate_t *) * str_idx);
1640	  re_node_set_free (&cur_dest);
1641	  return REG_NOERROR;
1642	}
1643      re_node_set_empty (&cur_dest);
1644      --str_idx;
1645
1646      if (mctx->state_log[str_idx])
1647	{
1648	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1649	  if (BE (err != REG_NOERROR, 0))
1650	    goto free_return;
1651	}
1652
1653      /* Add all the nodes which satisfy the following conditions:
1654	 - It can epsilon transit to a node in CUR_DEST.
1655	 - It is in CUR_SRC.
1656	 And update state_log.  */
1657      err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1658      if (BE (err != REG_NOERROR, 0))
1659	goto free_return;
1660    }
1661  err = REG_NOERROR;
1662 free_return:
1663  re_node_set_free (&cur_dest);
1664  return err;
1665}
1666
1667static reg_errcode_t
1668internal_function __attribute_warn_unused_result__
1669build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1670		     int str_idx, re_node_set *cur_dest)
1671{
1672  const re_dfa_t *const dfa = mctx->dfa;
1673  const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1674  int i;
1675
1676  /* Then build the next sifted state.
1677     We build the next sifted state on `cur_dest', and update
1678     `sifted_states[str_idx]' with `cur_dest'.
1679     Note:
1680     `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1681     `cur_src' points the node_set of the old `state_log[str_idx]'
1682     (with the epsilon nodes pre-filtered out).  */
1683  for (i = 0; i < cur_src->nelem; i++)
1684    {
1685      int prev_node = cur_src->elems[i];
1686      int naccepted = 0;
1687      int ret;
1688
1689#ifdef DEBUG
1690      re_token_type_t type = dfa->nodes[prev_node].type;
1691      assert (!IS_EPSILON_NODE (type));
1692#endif
1693#ifdef RE_ENABLE_I18N
1694      /* If the node may accept `multi byte'.  */
1695      if (dfa->nodes[prev_node].accept_mb)
1696	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1697					 str_idx, sctx->last_str_idx);
1698#endif /* RE_ENABLE_I18N */
1699
1700      /* We don't check backreferences here.
1701	 See update_cur_sifted_state().  */
1702      if (!naccepted
1703	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1704	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1705				  dfa->nexts[prev_node]))
1706	naccepted = 1;
1707
1708      if (naccepted == 0)
1709	continue;
1710
1711      if (sctx->limits.nelem)
1712	{
1713	  int to_idx = str_idx + naccepted;
1714	  if (check_dst_limits (mctx, &sctx->limits,
1715				dfa->nexts[prev_node], to_idx,
1716				prev_node, str_idx))
1717	    continue;
1718	}
1719      ret = re_node_set_insert (cur_dest, prev_node);
1720      if (BE (ret == -1, 0))
1721	return REG_ESPACE;
1722    }
1723
1724  return REG_NOERROR;
1725}
1726
1727/* Helper functions.  */
1728
1729static reg_errcode_t
1730internal_function
1731clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
1732{
1733  int top = mctx->state_log_top;
1734
1735  if ((next_state_log_idx >= mctx->input.bufs_len
1736       && mctx->input.bufs_len < mctx->input.len)
1737      || (next_state_log_idx >= mctx->input.valid_len
1738	  && mctx->input.valid_len < mctx->input.len))
1739    {
1740      reg_errcode_t err;
1741      err = extend_buffers (mctx, next_state_log_idx + 1);
1742      if (BE (err != REG_NOERROR, 0))
1743	return err;
1744    }
1745
1746  if (top < next_state_log_idx)
1747    {
1748      memset (mctx->state_log + top + 1, '\0',
1749	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1750      mctx->state_log_top = next_state_log_idx;
1751    }
1752  return REG_NOERROR;
1753}
1754
1755static reg_errcode_t
1756internal_function
1757merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1758		   re_dfastate_t **src, int num)
1759{
1760  int st_idx;
1761  reg_errcode_t err;
1762  for (st_idx = 0; st_idx < num; ++st_idx)
1763    {
1764      if (dst[st_idx] == NULL)
1765	dst[st_idx] = src[st_idx];
1766      else if (src[st_idx] != NULL)
1767	{
1768	  re_node_set merged_set;
1769	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1770					&src[st_idx]->nodes);
1771	  if (BE (err != REG_NOERROR, 0))
1772	    return err;
1773	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1774	  re_node_set_free (&merged_set);
1775	  if (BE (err != REG_NOERROR, 0))
1776	    return err;
1777	}
1778    }
1779  return REG_NOERROR;
1780}
1781
1782static reg_errcode_t
1783internal_function
1784update_cur_sifted_state (const re_match_context_t *mctx,
1785			 re_sift_context_t *sctx, int str_idx,
1786			 re_node_set *dest_nodes)
1787{
1788  const re_dfa_t *const dfa = mctx->dfa;
1789  reg_errcode_t err = REG_NOERROR;
1790  const re_node_set *candidates;
1791  candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1792		: &mctx->state_log[str_idx]->nodes);
1793
1794  if (dest_nodes->nelem == 0)
1795    sctx->sifted_states[str_idx] = NULL;
1796  else
1797    {
1798      if (candidates)
1799	{
1800	  /* At first, add the nodes which can epsilon transit to a node in
1801	     DEST_NODE.  */
1802	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1803	  if (BE (err != REG_NOERROR, 0))
1804	    return err;
1805
1806	  /* Then, check the limitations in the current sift_context.  */
1807	  if (sctx->limits.nelem)
1808	    {
1809	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1810					 mctx->bkref_ents, str_idx);
1811	      if (BE (err != REG_NOERROR, 0))
1812		return err;
1813	    }
1814	}
1815
1816      sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1817      if (BE (err != REG_NOERROR, 0))
1818	return err;
1819    }
1820
1821  if (candidates && mctx->state_log[str_idx]->has_backref)
1822    {
1823      err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1824      if (BE (err != REG_NOERROR, 0))
1825	return err;
1826    }
1827  return REG_NOERROR;
1828}
1829
1830static reg_errcode_t
1831internal_function __attribute_warn_unused_result__
1832add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1833		       const re_node_set *candidates)
1834{
1835  reg_errcode_t err = REG_NOERROR;
1836  int i;
1837
1838  re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1839  if (BE (err != REG_NOERROR, 0))
1840    return err;
1841
1842  if (!state->inveclosure.alloc)
1843    {
1844      err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1845      if (BE (err != REG_NOERROR, 0))
1846	return REG_ESPACE;
1847      for (i = 0; i < dest_nodes->nelem; i++)
1848	{
1849	  err = re_node_set_merge (&state->inveclosure,
1850				   dfa->inveclosures + dest_nodes->elems[i]);
1851	  if (BE (err != REG_NOERROR, 0))
1852	    return REG_ESPACE;
1853	}
1854    }
1855  return re_node_set_add_intersect (dest_nodes, candidates,
1856				    &state->inveclosure);
1857}
1858
1859static reg_errcode_t
1860internal_function
1861sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,
1862		       const re_node_set *candidates)
1863{
1864    int ecl_idx;
1865    reg_errcode_t err;
1866    re_node_set *inv_eclosure = dfa->inveclosures + node;
1867    re_node_set except_nodes;
1868    re_node_set_init_empty (&except_nodes);
1869    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1870      {
1871	int cur_node = inv_eclosure->elems[ecl_idx];
1872	if (cur_node == node)
1873	  continue;
1874	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1875	  {
1876	    int edst1 = dfa->edests[cur_node].elems[0];
1877	    int edst2 = ((dfa->edests[cur_node].nelem > 1)
1878			 ? dfa->edests[cur_node].elems[1] : -1);
1879	    if ((!re_node_set_contains (inv_eclosure, edst1)
1880		 && re_node_set_contains (dest_nodes, edst1))
1881		|| (edst2 > 0
1882		    && !re_node_set_contains (inv_eclosure, edst2)
1883		    && re_node_set_contains (dest_nodes, edst2)))
1884	      {
1885		err = re_node_set_add_intersect (&except_nodes, candidates,
1886						 dfa->inveclosures + cur_node);
1887		if (BE (err != REG_NOERROR, 0))
1888		  {
1889		    re_node_set_free (&except_nodes);
1890		    return err;
1891		  }
1892	      }
1893	  }
1894      }
1895    for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1896      {
1897	int cur_node = inv_eclosure->elems[ecl_idx];
1898	if (!re_node_set_contains (&except_nodes, cur_node))
1899	  {
1900	    int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1901	    re_node_set_remove_at (dest_nodes, idx);
1902	  }
1903      }
1904    re_node_set_free (&except_nodes);
1905    return REG_NOERROR;
1906}
1907
1908static int
1909internal_function
1910check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,
1911		  int dst_node, int dst_idx, int src_node, int src_idx)
1912{
1913  const re_dfa_t *const dfa = mctx->dfa;
1914  int lim_idx, src_pos, dst_pos;
1915
1916  int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1917  int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1918  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1919    {
1920      int subexp_idx;
1921      struct re_backref_cache_entry *ent;
1922      ent = mctx->bkref_ents + limits->elems[lim_idx];
1923      subexp_idx = dfa->nodes[ent->node].opr.idx;
1924
1925      dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1926					   subexp_idx, dst_node, dst_idx,
1927					   dst_bkref_idx);
1928      src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1929					   subexp_idx, src_node, src_idx,
1930					   src_bkref_idx);
1931
1932      /* In case of:
1933	 <src> <dst> ( <subexp> )
1934	 ( <subexp> ) <src> <dst>
1935	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1936      if (src_pos == dst_pos)
1937	continue; /* This is unrelated limitation.  */
1938      else
1939	return 1;
1940    }
1941  return 0;
1942}
1943
1944static int
1945internal_function
1946check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1947			     int subexp_idx, int from_node, int bkref_idx)
1948{
1949  const re_dfa_t *const dfa = mctx->dfa;
1950  const re_node_set *eclosures = dfa->eclosures + from_node;
1951  int node_idx;
1952
1953  /* Else, we are on the boundary: examine the nodes on the epsilon
1954     closure.  */
1955  for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1956    {
1957      int node = eclosures->elems[node_idx];
1958      switch (dfa->nodes[node].type)
1959	{
1960	case OP_BACK_REF:
1961	  if (bkref_idx != -1)
1962	    {
1963	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1964	      do
1965		{
1966		  int dst, cpos;
1967
1968		  if (ent->node != node)
1969		    continue;
1970
1971		  if (subexp_idx < BITSET_WORD_BITS
1972		      && !(ent->eps_reachable_subexps_map
1973			   & ((bitset_word_t) 1 << subexp_idx)))
1974		    continue;
1975
1976		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1977		     OP_CLOSE_SUBEXP cases below.  But, if the
1978		     destination node is the same node as the source
1979		     node, don't recurse because it would cause an
1980		     infinite loop: a regex that exhibits this behavior
1981		     is ()\1*\1*  */
1982		  dst = dfa->edests[node].elems[0];
1983		  if (dst == from_node)
1984		    {
1985		      if (boundaries & 1)
1986			return -1;
1987		      else /* if (boundaries & 2) */
1988			return 0;
1989		    }
1990
1991		  cpos =
1992		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1993						 dst, bkref_idx);
1994		  if (cpos == -1 /* && (boundaries & 1) */)
1995		    return -1;
1996		  if (cpos == 0 && (boundaries & 2))
1997		    return 0;
1998
1999		  if (subexp_idx < BITSET_WORD_BITS)
2000		    ent->eps_reachable_subexps_map
2001		      &= ~((bitset_word_t) 1 << subexp_idx);
2002		}
2003	      while (ent++->more);
2004	    }
2005	  break;
2006
2007	case OP_OPEN_SUBEXP:
2008	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2009	    return -1;
2010	  break;
2011
2012	case OP_CLOSE_SUBEXP:
2013	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2014	    return 0;
2015	  break;
2016
2017	default:
2018	    break;
2019	}
2020    }
2021
2022  return (boundaries & 2) ? 1 : 0;
2023}
2024
2025static int
2026internal_function
2027check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,
2028			   int subexp_idx, int from_node, int str_idx,
2029			   int bkref_idx)
2030{
2031  struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2032  int boundaries;
2033
2034  /* If we are outside the range of the subexpression, return -1 or 1.  */
2035  if (str_idx < lim->subexp_from)
2036    return -1;
2037
2038  if (lim->subexp_to < str_idx)
2039    return 1;
2040
2041  /* If we are within the subexpression, return 0.  */
2042  boundaries = (str_idx == lim->subexp_from);
2043  boundaries |= (str_idx == lim->subexp_to) << 1;
2044  if (boundaries == 0)
2045    return 0;
2046
2047  /* Else, examine epsilon closure.  */
2048  return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2049				      from_node, bkref_idx);
2050}
2051
2052/* Check the limitations of sub expressions LIMITS, and remove the nodes
2053   which are against limitations from DEST_NODES. */
2054
2055static reg_errcode_t
2056internal_function
2057check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2058		     const re_node_set *candidates, re_node_set *limits,
2059		     struct re_backref_cache_entry *bkref_ents, int str_idx)
2060{
2061  reg_errcode_t err;
2062  int node_idx, lim_idx;
2063
2064  for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2065    {
2066      int subexp_idx;
2067      struct re_backref_cache_entry *ent;
2068      ent = bkref_ents + limits->elems[lim_idx];
2069
2070      if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2071	continue; /* This is unrelated limitation.  */
2072
2073      subexp_idx = dfa->nodes[ent->node].opr.idx;
2074      if (ent->subexp_to == str_idx)
2075	{
2076	  int ops_node = -1;
2077	  int cls_node = -1;
2078	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2079	    {
2080	      int node = dest_nodes->elems[node_idx];
2081	      re_token_type_t type = dfa->nodes[node].type;
2082	      if (type == OP_OPEN_SUBEXP
2083		  && subexp_idx == dfa->nodes[node].opr.idx)
2084		ops_node = node;
2085	      else if (type == OP_CLOSE_SUBEXP
2086		       && subexp_idx == dfa->nodes[node].opr.idx)
2087		cls_node = node;
2088	    }
2089
2090	  /* Check the limitation of the open subexpression.  */
2091	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2092	  if (ops_node >= 0)
2093	    {
2094	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2095					   candidates);
2096	      if (BE (err != REG_NOERROR, 0))
2097		return err;
2098	    }
2099
2100	  /* Check the limitation of the close subexpression.  */
2101	  if (cls_node >= 0)
2102	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2103	      {
2104		int node = dest_nodes->elems[node_idx];
2105		if (!re_node_set_contains (dfa->inveclosures + node,
2106					   cls_node)
2107		    && !re_node_set_contains (dfa->eclosures + node,
2108					      cls_node))
2109		  {
2110		    /* It is against this limitation.
2111		       Remove it form the current sifted state.  */
2112		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2113						 candidates);
2114		    if (BE (err != REG_NOERROR, 0))
2115		      return err;
2116		    --node_idx;
2117		  }
2118	      }
2119	}
2120      else /* (ent->subexp_to != str_idx)  */
2121	{
2122	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2123	    {
2124	      int node = dest_nodes->elems[node_idx];
2125	      re_token_type_t type = dfa->nodes[node].type;
2126	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2127		{
2128		  if (subexp_idx != dfa->nodes[node].opr.idx)
2129		    continue;
2130		  /* It is against this limitation.
2131		     Remove it form the current sifted state.  */
2132		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2133					       candidates);
2134		  if (BE (err != REG_NOERROR, 0))
2135		    return err;
2136		}
2137	    }
2138	}
2139    }
2140  return REG_NOERROR;
2141}
2142
2143static reg_errcode_t
2144internal_function __attribute_warn_unused_result__
2145sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2146		   int str_idx, const re_node_set *candidates)
2147{
2148  const re_dfa_t *const dfa = mctx->dfa;
2149  reg_errcode_t err;
2150  int node_idx, node;
2151  re_sift_context_t local_sctx;
2152  int first_idx = search_cur_bkref_entry (mctx, str_idx);
2153
2154  if (first_idx == -1)
2155    return REG_NOERROR;
2156
2157  local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2158
2159  for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2160    {
2161      int enabled_idx;
2162      re_token_type_t type;
2163      struct re_backref_cache_entry *entry;
2164      node = candidates->elems[node_idx];
2165      type = dfa->nodes[node].type;
2166      /* Avoid infinite loop for the REs like "()\1+".  */
2167      if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2168	continue;
2169      if (type != OP_BACK_REF)
2170	continue;
2171
2172      entry = mctx->bkref_ents + first_idx;
2173      enabled_idx = first_idx;
2174      do
2175	{
2176	  int subexp_len;
2177	  int to_idx;
2178	  int dst_node;
2179	  int ret;
2180	  re_dfastate_t *cur_state;
2181
2182	  if (entry->node != node)
2183	    continue;
2184	  subexp_len = entry->subexp_to - entry->subexp_from;
2185	  to_idx = str_idx + subexp_len;
2186	  dst_node = (subexp_len ? dfa->nexts[node]
2187		      : dfa->edests[node].elems[0]);
2188
2189	  if (to_idx > sctx->last_str_idx
2190	      || sctx->sifted_states[to_idx] == NULL
2191	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2192	      || check_dst_limits (mctx, &sctx->limits, node,
2193				   str_idx, dst_node, to_idx))
2194	    continue;
2195
2196	  if (local_sctx.sifted_states == NULL)
2197	    {
2198	      local_sctx = *sctx;
2199	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2200	      if (BE (err != REG_NOERROR, 0))
2201		goto free_return;
2202	    }
2203	  local_sctx.last_node = node;
2204	  local_sctx.last_str_idx = str_idx;
2205	  ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
2206	  if (BE (ret < 0, 0))
2207	    {
2208	      err = REG_ESPACE;
2209	      goto free_return;
2210	    }
2211	  cur_state = local_sctx.sifted_states[str_idx];
2212	  err = sift_states_backward (mctx, &local_sctx);
2213	  if (BE (err != REG_NOERROR, 0))
2214	    goto free_return;
2215	  if (sctx->limited_states != NULL)
2216	    {
2217	      err = merge_state_array (dfa, sctx->limited_states,
2218				       local_sctx.sifted_states,
2219				       str_idx + 1);
2220	      if (BE (err != REG_NOERROR, 0))
2221		goto free_return;
2222	    }
2223	  local_sctx.sifted_states[str_idx] = cur_state;
2224	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2225
2226	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2227	  entry = mctx->bkref_ents + enabled_idx;
2228	}
2229      while (enabled_idx++, entry++->more);
2230    }
2231  err = REG_NOERROR;
2232 free_return:
2233  if (local_sctx.sifted_states != NULL)
2234    {
2235      re_node_set_free (&local_sctx.limits);
2236    }
2237
2238  return err;
2239}
2240
2241
2242#ifdef RE_ENABLE_I18N
2243static int
2244internal_function
2245sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2246		     int node_idx, int str_idx, int max_str_idx)
2247{
2248  const re_dfa_t *const dfa = mctx->dfa;
2249  int naccepted;
2250  /* Check the node can accept `multi byte'.  */
2251  naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2252  if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2253      !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2254			    dfa->nexts[node_idx]))
2255    /* The node can't accept the `multi byte', or the
2256       destination was already thrown away, then the node
2257       could't accept the current input `multi byte'.   */
2258    naccepted = 0;
2259  /* Otherwise, it is sure that the node could accept
2260     `naccepted' bytes input.  */
2261  return naccepted;
2262}
2263#endif /* RE_ENABLE_I18N */
2264
2265
2266/* Functions for state transition.  */
2267
2268/* Return the next state to which the current state STATE will transit by
2269   accepting the current input byte, and update STATE_LOG if necessary.
2270   If STATE can accept a multibyte char/collating element/back reference
2271   update the destination of STATE_LOG.  */
2272
2273static re_dfastate_t *
2274internal_function __attribute_warn_unused_result__
2275transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2276	       re_dfastate_t *state)
2277{
2278  re_dfastate_t **trtable;
2279  unsigned char ch;
2280
2281#ifdef RE_ENABLE_I18N
2282  /* If the current state can accept multibyte.  */
2283  if (BE (state->accept_mb, 0))
2284    {
2285      *err = transit_state_mb (mctx, state);
2286      if (BE (*err != REG_NOERROR, 0))
2287	return NULL;
2288    }
2289#endif /* RE_ENABLE_I18N */
2290
2291  /* Then decide the next state with the single byte.  */
2292#if 0
2293  if (0)
2294    /* don't use transition table  */
2295    return transit_state_sb (err, mctx, state);
2296#endif
2297
2298  /* Use transition table  */
2299  ch = re_string_fetch_byte (&mctx->input);
2300  for (;;)
2301    {
2302      trtable = state->trtable;
2303      if (BE (trtable != NULL, 1))
2304	return trtable[ch];
2305
2306      trtable = state->word_trtable;
2307      if (BE (trtable != NULL, 1))
2308	{
2309	  unsigned int context;
2310	  context
2311	    = re_string_context_at (&mctx->input,
2312				    re_string_cur_idx (&mctx->input) - 1,
2313				    mctx->eflags);
2314	  if (IS_WORD_CONTEXT (context))
2315	    return trtable[ch + SBC_MAX];
2316	  else
2317	    return trtable[ch];
2318	}
2319
2320      if (!build_trtable (mctx->dfa, state))
2321	{
2322	  *err = REG_ESPACE;
2323	  return NULL;
2324	}
2325
2326      /* Retry, we now have a transition table.  */
2327    }
2328}
2329
2330/* Update the state_log if we need */
2331re_dfastate_t *
2332internal_function
2333merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2334		      re_dfastate_t *next_state)
2335{
2336  const re_dfa_t *const dfa = mctx->dfa;
2337  int cur_idx = re_string_cur_idx (&mctx->input);
2338
2339  if (cur_idx > mctx->state_log_top)
2340    {
2341      mctx->state_log[cur_idx] = next_state;
2342      mctx->state_log_top = cur_idx;
2343    }
2344  else if (mctx->state_log[cur_idx] == 0)
2345    {
2346      mctx->state_log[cur_idx] = next_state;
2347    }
2348  else
2349    {
2350      re_dfastate_t *pstate;
2351      unsigned int context;
2352      re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2353      /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2354	 the destination of a multibyte char/collating element/
2355	 back reference.  Then the next state is the union set of
2356	 these destinations and the results of the transition table.  */
2357      pstate = mctx->state_log[cur_idx];
2358      log_nodes = pstate->entrance_nodes;
2359      if (next_state != NULL)
2360	{
2361	  table_nodes = next_state->entrance_nodes;
2362	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2363					     log_nodes);
2364	  if (BE (*err != REG_NOERROR, 0))
2365	    return NULL;
2366	}
2367      else
2368	next_nodes = *log_nodes;
2369      /* Note: We already add the nodes of the initial state,
2370	 then we don't need to add them here.  */
2371
2372      context = re_string_context_at (&mctx->input,
2373				      re_string_cur_idx (&mctx->input) - 1,
2374				      mctx->eflags);
2375      next_state = mctx->state_log[cur_idx]
2376	= re_acquire_state_context (err, dfa, &next_nodes, context);
2377      /* We don't need to check errors here, since the return value of
2378	 this function is next_state and ERR is already set.  */
2379
2380      if (table_nodes != NULL)
2381	re_node_set_free (&next_nodes);
2382    }
2383
2384  if (BE (dfa->nbackref, 0) && next_state != NULL)
2385    {
2386      /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2387	 later.  We must check them here, since the back references in the
2388	 next state might use them.  */
2389      *err = check_subexp_matching_top (mctx, &next_state->nodes,
2390					cur_idx);
2391      if (BE (*err != REG_NOERROR, 0))
2392	return NULL;
2393
2394      /* If the next state has back references.  */
2395      if (next_state->has_backref)
2396	{
2397	  *err = transit_state_bkref (mctx, &next_state->nodes);
2398	  if (BE (*err != REG_NOERROR, 0))
2399	    return NULL;
2400	  next_state = mctx->state_log[cur_idx];
2401	}
2402    }
2403
2404  return next_state;
2405}
2406
2407/* Skip bytes in the input that correspond to part of a
2408   multi-byte match, then look in the log for a state
2409   from which to restart matching.  */
2410re_dfastate_t *
2411internal_function
2412find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2413{
2414  re_dfastate_t *cur_state;
2415  do
2416    {
2417      int max = mctx->state_log_top;
2418      int cur_str_idx = re_string_cur_idx (&mctx->input);
2419
2420      do
2421	{
2422	  if (++cur_str_idx > max)
2423	    return NULL;
2424	  re_string_skip_bytes (&mctx->input, 1);
2425	}
2426      while (mctx->state_log[cur_str_idx] == NULL);
2427
2428      cur_state = merge_state_with_log (err, mctx, NULL);
2429    }
2430  while (*err == REG_NOERROR && cur_state == NULL);
2431  return cur_state;
2432}
2433
2434/* Helper functions for transit_state.  */
2435
2436/* From the node set CUR_NODES, pick up the nodes whose types are
2437   OP_OPEN_SUBEXP and which have corresponding back references in the regular
2438   expression. And register them to use them later for evaluating the
2439   correspoding back references.  */
2440
2441static reg_errcode_t
2442internal_function
2443check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2444			   int str_idx)
2445{
2446  const re_dfa_t *const dfa = mctx->dfa;
2447  int node_idx;
2448  reg_errcode_t err;
2449
2450  /* TODO: This isn't efficient.
2451	   Because there might be more than one nodes whose types are
2452	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2453	   nodes.
2454	   E.g. RE: (a){2}  */
2455  for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2456    {
2457      int node = cur_nodes->elems[node_idx];
2458      if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2459	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2460	  && (dfa->used_bkref_map
2461	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2462	{
2463	  err = match_ctx_add_subtop (mctx, node, str_idx);
2464	  if (BE (err != REG_NOERROR, 0))
2465	    return err;
2466	}
2467    }
2468  return REG_NOERROR;
2469}
2470
2471#if 0
2472/* Return the next state to which the current state STATE will transit by
2473   accepting the current input byte.  */
2474
2475static re_dfastate_t *
2476transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2477		  re_dfastate_t *state)
2478{
2479  const re_dfa_t *const dfa = mctx->dfa;
2480  re_node_set next_nodes;
2481  re_dfastate_t *next_state;
2482  int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2483  unsigned int context;
2484
2485  *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2486  if (BE (*err != REG_NOERROR, 0))
2487    return NULL;
2488  for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2489    {
2490      int cur_node = state->nodes.elems[node_cnt];
2491      if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2492	{
2493	  *err = re_node_set_merge (&next_nodes,
2494				    dfa->eclosures + dfa->nexts[cur_node]);
2495	  if (BE (*err != REG_NOERROR, 0))
2496	    {
2497	      re_node_set_free (&next_nodes);
2498	      return NULL;
2499	    }
2500	}
2501    }
2502  context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2503  next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2504  /* We don't need to check errors here, since the return value of
2505     this function is next_state and ERR is already set.  */
2506
2507  re_node_set_free (&next_nodes);
2508  re_string_skip_bytes (&mctx->input, 1);
2509  return next_state;
2510}
2511#endif
2512
2513#ifdef RE_ENABLE_I18N
2514static reg_errcode_t
2515internal_function
2516transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2517{
2518  const re_dfa_t *const dfa = mctx->dfa;
2519  reg_errcode_t err;
2520  int i;
2521
2522  for (i = 0; i < pstate->nodes.nelem; ++i)
2523    {
2524      re_node_set dest_nodes, *new_nodes;
2525      int cur_node_idx = pstate->nodes.elems[i];
2526      int naccepted, dest_idx;
2527      unsigned int context;
2528      re_dfastate_t *dest_state;
2529
2530      if (!dfa->nodes[cur_node_idx].accept_mb)
2531	continue;
2532
2533      if (dfa->nodes[cur_node_idx].constraint)
2534	{
2535	  context = re_string_context_at (&mctx->input,
2536					  re_string_cur_idx (&mctx->input),
2537					  mctx->eflags);
2538	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2539					   context))
2540	    continue;
2541	}
2542
2543      /* How many bytes the node can accept?  */
2544      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2545					   re_string_cur_idx (&mctx->input));
2546      if (naccepted == 0)
2547	continue;
2548
2549      /* The node can accepts `naccepted' bytes.  */
2550      dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2551      mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2552			       : mctx->max_mb_elem_len);
2553      err = clean_state_log_if_needed (mctx, dest_idx);
2554      if (BE (err != REG_NOERROR, 0))
2555	return err;
2556#ifdef DEBUG
2557      assert (dfa->nexts[cur_node_idx] != -1);
2558#endif
2559      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2560
2561      dest_state = mctx->state_log[dest_idx];
2562      if (dest_state == NULL)
2563	dest_nodes = *new_nodes;
2564      else
2565	{
2566	  err = re_node_set_init_union (&dest_nodes,
2567					dest_state->entrance_nodes, new_nodes);
2568	  if (BE (err != REG_NOERROR, 0))
2569	    return err;
2570	}
2571      context = re_string_context_at (&mctx->input, dest_idx - 1,
2572				      mctx->eflags);
2573      mctx->state_log[dest_idx]
2574	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2575      if (dest_state != NULL)
2576	re_node_set_free (&dest_nodes);
2577      if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2578	return err;
2579    }
2580  return REG_NOERROR;
2581}
2582#endif /* RE_ENABLE_I18N */
2583
2584static reg_errcode_t
2585internal_function
2586transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2587{
2588  const re_dfa_t *const dfa = mctx->dfa;
2589  reg_errcode_t err;
2590  int i;
2591  int cur_str_idx = re_string_cur_idx (&mctx->input);
2592
2593  for (i = 0; i < nodes->nelem; ++i)
2594    {
2595      int dest_str_idx, prev_nelem, bkc_idx;
2596      int node_idx = nodes->elems[i];
2597      unsigned int context;
2598      const re_token_t *node = dfa->nodes + node_idx;
2599      re_node_set *new_dest_nodes;
2600
2601      /* Check whether `node' is a backreference or not.  */
2602      if (node->type != OP_BACK_REF)
2603	continue;
2604
2605      if (node->constraint)
2606	{
2607	  context = re_string_context_at (&mctx->input, cur_str_idx,
2608					  mctx->eflags);
2609	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2610	    continue;
2611	}
2612
2613      /* `node' is a backreference.
2614	 Check the substring which the substring matched.  */
2615      bkc_idx = mctx->nbkref_ents;
2616      err = get_subexp (mctx, node_idx, cur_str_idx);
2617      if (BE (err != REG_NOERROR, 0))
2618	goto free_return;
2619
2620      /* And add the epsilon closures (which is `new_dest_nodes') of
2621	 the backreference to appropriate state_log.  */
2622#ifdef DEBUG
2623      assert (dfa->nexts[node_idx] != -1);
2624#endif
2625      for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2626	{
2627	  int subexp_len;
2628	  re_dfastate_t *dest_state;
2629	  struct re_backref_cache_entry *bkref_ent;
2630	  bkref_ent = mctx->bkref_ents + bkc_idx;
2631	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2632	    continue;
2633	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2634	  new_dest_nodes = (subexp_len == 0
2635			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2636			    : dfa->eclosures + dfa->nexts[node_idx]);
2637	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2638			  - bkref_ent->subexp_from);
2639	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2640					  mctx->eflags);
2641	  dest_state = mctx->state_log[dest_str_idx];
2642	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2643			: mctx->state_log[cur_str_idx]->nodes.nelem);
2644	  /* Add `new_dest_node' to state_log.  */
2645	  if (dest_state == NULL)
2646	    {
2647	      mctx->state_log[dest_str_idx]
2648		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2649					    context);
2650	      if (BE (mctx->state_log[dest_str_idx] == NULL
2651		      && err != REG_NOERROR, 0))
2652		goto free_return;
2653	    }
2654	  else
2655	    {
2656	      re_node_set dest_nodes;
2657	      err = re_node_set_init_union (&dest_nodes,
2658					    dest_state->entrance_nodes,
2659					    new_dest_nodes);
2660	      if (BE (err != REG_NOERROR, 0))
2661		{
2662		  re_node_set_free (&dest_nodes);
2663		  goto free_return;
2664		}
2665	      mctx->state_log[dest_str_idx]
2666		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2667	      re_node_set_free (&dest_nodes);
2668	      if (BE (mctx->state_log[dest_str_idx] == NULL
2669		      && err != REG_NOERROR, 0))
2670		goto free_return;
2671	    }
2672	  /* We need to check recursively if the backreference can epsilon
2673	     transit.  */
2674	  if (subexp_len == 0
2675	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2676	    {
2677	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2678					       cur_str_idx);
2679	      if (BE (err != REG_NOERROR, 0))
2680		goto free_return;
2681	      err = transit_state_bkref (mctx, new_dest_nodes);
2682	      if (BE (err != REG_NOERROR, 0))
2683		goto free_return;
2684	    }
2685	}
2686    }
2687  err = REG_NOERROR;
2688 free_return:
2689  return err;
2690}
2691
2692/* Enumerate all the candidates which the backreference BKREF_NODE can match
2693   at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2694   Note that we might collect inappropriate candidates here.
2695   However, the cost of checking them strictly here is too high, then we
2696   delay these checking for prune_impossible_nodes().  */
2697
2698static reg_errcode_t
2699internal_function __attribute_warn_unused_result__
2700get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
2701{
2702  const re_dfa_t *const dfa = mctx->dfa;
2703  int subexp_num, sub_top_idx;
2704  const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2705  /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2706  int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2707  if (cache_idx != -1)
2708    {
2709      const struct re_backref_cache_entry *entry
2710	= mctx->bkref_ents + cache_idx;
2711      do
2712	if (entry->node == bkref_node)
2713	  return REG_NOERROR; /* We already checked it.  */
2714      while (entry++->more);
2715    }
2716
2717  subexp_num = dfa->nodes[bkref_node].opr.idx;
2718
2719  /* For each sub expression  */
2720  for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2721    {
2722      reg_errcode_t err;
2723      re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2724      re_sub_match_last_t *sub_last;
2725      int sub_last_idx, sl_str, bkref_str_off;
2726
2727      if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2728	continue; /* It isn't related.  */
2729
2730      sl_str = sub_top->str_idx;
2731      bkref_str_off = bkref_str_idx;
2732      /* At first, check the last node of sub expressions we already
2733	 evaluated.  */
2734      for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2735	{
2736	  int sl_str_diff;
2737	  sub_last = sub_top->lasts[sub_last_idx];
2738	  sl_str_diff = sub_last->str_idx - sl_str;
2739	  /* The matched string by the sub expression match with the substring
2740	     at the back reference?  */
2741	  if (sl_str_diff > 0)
2742	    {
2743	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2744		{
2745		  /* Not enough chars for a successful match.  */
2746		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2747		    break;
2748
2749		  err = clean_state_log_if_needed (mctx,
2750						   bkref_str_off
2751						   + sl_str_diff);
2752		  if (BE (err != REG_NOERROR, 0))
2753		    return err;
2754		  buf = (const char *) re_string_get_buffer (&mctx->input);
2755		}
2756	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2757		/* We don't need to search this sub expression any more.  */
2758		break;
2759	    }
2760	  bkref_str_off += sl_str_diff;
2761	  sl_str += sl_str_diff;
2762	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2763				bkref_str_idx);
2764
2765	  /* Reload buf, since the preceding call might have reallocated
2766	     the buffer.  */
2767	  buf = (const char *) re_string_get_buffer (&mctx->input);
2768
2769	  if (err == REG_NOMATCH)
2770	    continue;
2771	  if (BE (err != REG_NOERROR, 0))
2772	    return err;
2773	}
2774
2775      if (sub_last_idx < sub_top->nlasts)
2776	continue;
2777      if (sub_last_idx > 0)
2778	++sl_str;
2779      /* Then, search for the other last nodes of the sub expression.  */
2780      for (; sl_str <= bkref_str_idx; ++sl_str)
2781	{
2782	  int cls_node, sl_str_off;
2783	  const re_node_set *nodes;
2784	  sl_str_off = sl_str - sub_top->str_idx;
2785	  /* The matched string by the sub expression match with the substring
2786	     at the back reference?  */
2787	  if (sl_str_off > 0)
2788	    {
2789	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2790		{
2791		  /* If we are at the end of the input, we cannot match.  */
2792		  if (bkref_str_off >= mctx->input.len)
2793		    break;
2794
2795		  err = extend_buffers (mctx, bkref_str_off + 1);
2796		  if (BE (err != REG_NOERROR, 0))
2797		    return err;
2798
2799		  buf = (const char *) re_string_get_buffer (&mctx->input);
2800		}
2801	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2802		break; /* We don't need to search this sub expression
2803			  any more.  */
2804	    }
2805	  if (mctx->state_log[sl_str] == NULL)
2806	    continue;
2807	  /* Does this state have a ')' of the sub expression?  */
2808	  nodes = &mctx->state_log[sl_str]->nodes;
2809	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2810				       OP_CLOSE_SUBEXP);
2811	  if (cls_node == -1)
2812	    continue; /* No.  */
2813	  if (sub_top->path == NULL)
2814	    {
2815	      sub_top->path = calloc (sizeof (state_array_t),
2816				      sl_str - sub_top->str_idx + 1);
2817	      if (sub_top->path == NULL)
2818		return REG_ESPACE;
2819	    }
2820	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2821	     in the current context?  */
2822	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2823			       sub_top->str_idx, cls_node, sl_str,
2824			       OP_CLOSE_SUBEXP);
2825	  if (err == REG_NOMATCH)
2826	      continue;
2827	  if (BE (err != REG_NOERROR, 0))
2828	      return err;
2829	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2830	  if (BE (sub_last == NULL, 0))
2831	    return REG_ESPACE;
2832	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2833				bkref_str_idx);
2834	  if (err == REG_NOMATCH)
2835	    continue;
2836	}
2837    }
2838  return REG_NOERROR;
2839}
2840
2841/* Helper functions for get_subexp().  */
2842
2843/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2844   If it can arrive, register the sub expression expressed with SUB_TOP
2845   and SUB_LAST.  */
2846
2847static reg_errcode_t
2848internal_function
2849get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2850		re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)
2851{
2852  reg_errcode_t err;
2853  int to_idx;
2854  /* Can the subexpression arrive the back reference?  */
2855  err = check_arrival (mctx, &sub_last->path, sub_last->node,
2856		       sub_last->str_idx, bkref_node, bkref_str,
2857		       OP_OPEN_SUBEXP);
2858  if (err != REG_NOERROR)
2859    return err;
2860  err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2861			     sub_last->str_idx);
2862  if (BE (err != REG_NOERROR, 0))
2863    return err;
2864  to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2865  return clean_state_log_if_needed (mctx, to_idx);
2866}
2867
2868/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2869   Search '(' if FL_OPEN, or search ')' otherwise.
2870   TODO: This function isn't efficient...
2871	 Because there might be more than one nodes whose types are
2872	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2873	 nodes.
2874	 E.g. RE: (a){2}  */
2875
2876static int
2877internal_function
2878find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2879		  int subexp_idx, int type)
2880{
2881  int cls_idx;
2882  for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2883    {
2884      int cls_node = nodes->elems[cls_idx];
2885      const re_token_t *node = dfa->nodes + cls_node;
2886      if (node->type == type
2887	  && node->opr.idx == subexp_idx)
2888	return cls_node;
2889    }
2890  return -1;
2891}
2892
2893/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2894   LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2895   heavily reused.
2896   Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2897
2898static reg_errcode_t
2899internal_function __attribute_warn_unused_result__
2900check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,
2901	       int top_str, int last_node, int last_str, int type)
2902{
2903  const re_dfa_t *const dfa = mctx->dfa;
2904  reg_errcode_t err = REG_NOERROR;
2905  int subexp_num, backup_cur_idx, str_idx, null_cnt;
2906  re_dfastate_t *cur_state = NULL;
2907  re_node_set *cur_nodes, next_nodes;
2908  re_dfastate_t **backup_state_log;
2909  unsigned int context;
2910
2911  subexp_num = dfa->nodes[top_node].opr.idx;
2912  /* Extend the buffer if we need.  */
2913  if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2914    {
2915      re_dfastate_t **new_array;
2916      int old_alloc = path->alloc;
2917      path->alloc += last_str + mctx->max_mb_elem_len + 1;
2918      new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);
2919      if (BE (new_array == NULL, 0))
2920	{
2921	  path->alloc = old_alloc;
2922	  return REG_ESPACE;
2923	}
2924      path->array = new_array;
2925      memset (new_array + old_alloc, '\0',
2926	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2927    }
2928
2929  str_idx = path->next_idx ?: top_str;
2930
2931  /* Temporary modify MCTX.  */
2932  backup_state_log = mctx->state_log;
2933  backup_cur_idx = mctx->input.cur_idx;
2934  mctx->state_log = path->array;
2935  mctx->input.cur_idx = str_idx;
2936
2937  /* Setup initial node set.  */
2938  context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2939  if (str_idx == top_str)
2940    {
2941      err = re_node_set_init_1 (&next_nodes, top_node);
2942      if (BE (err != REG_NOERROR, 0))
2943	return err;
2944      err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2945      if (BE (err != REG_NOERROR, 0))
2946	{
2947	  re_node_set_free (&next_nodes);
2948	  return err;
2949	}
2950    }
2951  else
2952    {
2953      cur_state = mctx->state_log[str_idx];
2954      if (cur_state && cur_state->has_backref)
2955	{
2956	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2957	  if (BE (err != REG_NOERROR, 0))
2958	    return err;
2959	}
2960      else
2961	re_node_set_init_empty (&next_nodes);
2962    }
2963  if (str_idx == top_str || (cur_state && cur_state->has_backref))
2964    {
2965      if (next_nodes.nelem)
2966	{
2967	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2968				    subexp_num, type);
2969	  if (BE (err != REG_NOERROR, 0))
2970	    {
2971	      re_node_set_free (&next_nodes);
2972	      return err;
2973	    }
2974	}
2975      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2976      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2977	{
2978	  re_node_set_free (&next_nodes);
2979	  return err;
2980	}
2981      mctx->state_log[str_idx] = cur_state;
2982    }
2983
2984  for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2985    {
2986      re_node_set_empty (&next_nodes);
2987      if (mctx->state_log[str_idx + 1])
2988	{
2989	  err = re_node_set_merge (&next_nodes,
2990				   &mctx->state_log[str_idx + 1]->nodes);
2991	  if (BE (err != REG_NOERROR, 0))
2992	    {
2993	      re_node_set_free (&next_nodes);
2994	      return err;
2995	    }
2996	}
2997      if (cur_state)
2998	{
2999	  err = check_arrival_add_next_nodes (mctx, str_idx,
3000					      &cur_state->non_eps_nodes,
3001					      &next_nodes);
3002	  if (BE (err != REG_NOERROR, 0))
3003	    {
3004	      re_node_set_free (&next_nodes);
3005	      return err;
3006	    }
3007	}
3008      ++str_idx;
3009      if (next_nodes.nelem)
3010	{
3011	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3012	  if (BE (err != REG_NOERROR, 0))
3013	    {
3014	      re_node_set_free (&next_nodes);
3015	      return err;
3016	    }
3017	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3018				    subexp_num, type);
3019	  if (BE (err != REG_NOERROR, 0))
3020	    {
3021	      re_node_set_free (&next_nodes);
3022	      return err;
3023	    }
3024	}
3025      context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3026      cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3027      if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3028	{
3029	  re_node_set_free (&next_nodes);
3030	  return err;
3031	}
3032      mctx->state_log[str_idx] = cur_state;
3033      null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3034    }
3035  re_node_set_free (&next_nodes);
3036  cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3037	       : &mctx->state_log[last_str]->nodes);
3038  path->next_idx = str_idx;
3039
3040  /* Fix MCTX.  */
3041  mctx->state_log = backup_state_log;
3042  mctx->input.cur_idx = backup_cur_idx;
3043
3044  /* Then check the current node set has the node LAST_NODE.  */
3045  if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3046    return REG_NOERROR;
3047
3048  return REG_NOMATCH;
3049}
3050
3051/* Helper functions for check_arrival.  */
3052
3053/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3054   to NEXT_NODES.
3055   TODO: This function is similar to the functions transit_state*(),
3056	 however this function has many additional works.
3057	 Can't we unify them?  */
3058
3059static reg_errcode_t
3060internal_function __attribute_warn_unused_result__
3061check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,
3062			      re_node_set *cur_nodes, re_node_set *next_nodes)
3063{
3064  const re_dfa_t *const dfa = mctx->dfa;
3065  int result;
3066  int cur_idx;
3067  reg_errcode_t err = REG_NOERROR;
3068  re_node_set union_set;
3069  re_node_set_init_empty (&union_set);
3070  for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3071    {
3072      int naccepted = 0;
3073      int cur_node = cur_nodes->elems[cur_idx];
3074#ifdef DEBUG
3075      re_token_type_t type = dfa->nodes[cur_node].type;
3076      assert (!IS_EPSILON_NODE (type));
3077#endif
3078#ifdef RE_ENABLE_I18N
3079      /* If the node may accept `multi byte'.  */
3080      if (dfa->nodes[cur_node].accept_mb)
3081	{
3082	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3083					       str_idx);
3084	  if (naccepted > 1)
3085	    {
3086	      re_dfastate_t *dest_state;
3087	      int next_node = dfa->nexts[cur_node];
3088	      int next_idx = str_idx + naccepted;
3089	      dest_state = mctx->state_log[next_idx];
3090	      re_node_set_empty (&union_set);
3091	      if (dest_state)
3092		{
3093		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3094		  if (BE (err != REG_NOERROR, 0))
3095		    {
3096		      re_node_set_free (&union_set);
3097		      return err;
3098		    }
3099		}
3100	      result = re_node_set_insert (&union_set, next_node);
3101	      if (BE (result < 0, 0))
3102		{
3103		  re_node_set_free (&union_set);
3104		  return REG_ESPACE;
3105		}
3106	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3107							    &union_set);
3108	      if (BE (mctx->state_log[next_idx] == NULL
3109		      && err != REG_NOERROR, 0))
3110		{
3111		  re_node_set_free (&union_set);
3112		  return err;
3113		}
3114	    }
3115	}
3116#endif /* RE_ENABLE_I18N */
3117      if (naccepted
3118	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3119	{
3120	  result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3121	  if (BE (result < 0, 0))
3122	    {
3123	      re_node_set_free (&union_set);
3124	      return REG_ESPACE;
3125	    }
3126	}
3127    }
3128  re_node_set_free (&union_set);
3129  return REG_NOERROR;
3130}
3131
3132/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3133   CUR_NODES, however exclude the nodes which are:
3134    - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3135    - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3136*/
3137
3138static reg_errcode_t
3139internal_function
3140check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3141			  int ex_subexp, int type)
3142{
3143  reg_errcode_t err;
3144  int idx, outside_node;
3145  re_node_set new_nodes;
3146#ifdef DEBUG
3147  assert (cur_nodes->nelem);
3148#endif
3149  err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3150  if (BE (err != REG_NOERROR, 0))
3151    return err;
3152  /* Create a new node set NEW_NODES with the nodes which are epsilon
3153     closures of the node in CUR_NODES.  */
3154
3155  for (idx = 0; idx < cur_nodes->nelem; ++idx)
3156    {
3157      int cur_node = cur_nodes->elems[idx];
3158      const re_node_set *eclosure = dfa->eclosures + cur_node;
3159      outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3160      if (outside_node == -1)
3161	{
3162	  /* There are no problematic nodes, just merge them.  */
3163	  err = re_node_set_merge (&new_nodes, eclosure);
3164	  if (BE (err != REG_NOERROR, 0))
3165	    {
3166	      re_node_set_free (&new_nodes);
3167	      return err;
3168	    }
3169	}
3170      else
3171	{
3172	  /* There are problematic nodes, re-calculate incrementally.  */
3173	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3174					      ex_subexp, type);
3175	  if (BE (err != REG_NOERROR, 0))
3176	    {
3177	      re_node_set_free (&new_nodes);
3178	      return err;
3179	    }
3180	}
3181    }
3182  re_node_set_free (cur_nodes);
3183  *cur_nodes = new_nodes;
3184  return REG_NOERROR;
3185}
3186
3187/* Helper function for check_arrival_expand_ecl.
3188   Check incrementally the epsilon closure of TARGET, and if it isn't
3189   problematic append it to DST_NODES.  */
3190
3191static reg_errcode_t
3192internal_function __attribute_warn_unused_result__
3193check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3194			      int target, int ex_subexp, int type)
3195{
3196  int cur_node;
3197  for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3198    {
3199      int err;
3200
3201      if (dfa->nodes[cur_node].type == type
3202	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3203	{
3204	  if (type == OP_CLOSE_SUBEXP)
3205	    {
3206	      err = re_node_set_insert (dst_nodes, cur_node);
3207	      if (BE (err == -1, 0))
3208		return REG_ESPACE;
3209	    }
3210	  break;
3211	}
3212      err = re_node_set_insert (dst_nodes, cur_node);
3213      if (BE (err == -1, 0))
3214	return REG_ESPACE;
3215      if (dfa->edests[cur_node].nelem == 0)
3216	break;
3217      if (dfa->edests[cur_node].nelem == 2)
3218	{
3219	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3220					      dfa->edests[cur_node].elems[1],
3221					      ex_subexp, type);
3222	  if (BE (err != REG_NOERROR, 0))
3223	    return err;
3224	}
3225      cur_node = dfa->edests[cur_node].elems[0];
3226    }
3227  return REG_NOERROR;
3228}
3229
3230
3231/* For all the back references in the current state, calculate the
3232   destination of the back references by the appropriate entry
3233   in MCTX->BKREF_ENTS.  */
3234
3235static reg_errcode_t
3236internal_function __attribute_warn_unused_result__
3237expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3238		    int cur_str, int subexp_num, int type)
3239{
3240  const re_dfa_t *const dfa = mctx->dfa;
3241  reg_errcode_t err;
3242  int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3243  struct re_backref_cache_entry *ent;
3244
3245  if (cache_idx_start == -1)
3246    return REG_NOERROR;
3247
3248 restart:
3249  ent = mctx->bkref_ents + cache_idx_start;
3250  do
3251    {
3252      int to_idx, next_node;
3253
3254      /* Is this entry ENT is appropriate?  */
3255      if (!re_node_set_contains (cur_nodes, ent->node))
3256	continue; /* No.  */
3257
3258      to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3259      /* Calculate the destination of the back reference, and append it
3260	 to MCTX->STATE_LOG.  */
3261      if (to_idx == cur_str)
3262	{
3263	  /* The backreference did epsilon transit, we must re-check all the
3264	     node in the current state.  */
3265	  re_node_set new_dests;
3266	  reg_errcode_t err2, err3;
3267	  next_node = dfa->edests[ent->node].elems[0];
3268	  if (re_node_set_contains (cur_nodes, next_node))
3269	    continue;
3270	  err = re_node_set_init_1 (&new_dests, next_node);
3271	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3272	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3273	  re_node_set_free (&new_dests);
3274	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3275		  || err3 != REG_NOERROR, 0))
3276	    {
3277	      err = (err != REG_NOERROR ? err
3278		     : (err2 != REG_NOERROR ? err2 : err3));
3279	      return err;
3280	    }
3281	  /* TODO: It is still inefficient...  */
3282	  goto restart;
3283	}
3284      else
3285	{
3286	  re_node_set union_set;
3287	  next_node = dfa->nexts[ent->node];
3288	  if (mctx->state_log[to_idx])
3289	    {
3290	      int ret;
3291	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3292					next_node))
3293		continue;
3294	      err = re_node_set_init_copy (&union_set,
3295					   &mctx->state_log[to_idx]->nodes);
3296	      ret = re_node_set_insert (&union_set, next_node);
3297	      if (BE (err != REG_NOERROR || ret < 0, 0))
3298		{
3299		  re_node_set_free (&union_set);
3300		  err = err != REG_NOERROR ? err : REG_ESPACE;
3301		  return err;
3302		}
3303	    }
3304	  else
3305	    {
3306	      err = re_node_set_init_1 (&union_set, next_node);
3307	      if (BE (err != REG_NOERROR, 0))
3308		return err;
3309	    }
3310	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3311	  re_node_set_free (&union_set);
3312	  if (BE (mctx->state_log[to_idx] == NULL
3313		  && err != REG_NOERROR, 0))
3314	    return err;
3315	}
3316    }
3317  while (ent++->more);
3318  return REG_NOERROR;
3319}
3320
3321/* Build transition table for the state.
3322   Return 1 if succeeded, otherwise return NULL.  */
3323
3324static int
3325internal_function
3326build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3327{
3328  reg_errcode_t err;
3329  int i, j, ch, need_word_trtable = 0;
3330  bitset_word_t elem, mask;
3331  bool dests_node_malloced = false;
3332  bool dest_states_malloced = false;
3333  int ndests; /* Number of the destination states from `state'.  */
3334  re_dfastate_t **trtable;
3335  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3336  re_node_set follows, *dests_node;
3337  bitset_t *dests_ch;
3338  bitset_t acceptable;
3339
3340  struct dests_alloc
3341  {
3342    re_node_set dests_node[SBC_MAX];
3343    bitset_t dests_ch[SBC_MAX];
3344  } *dests_alloc;
3345
3346  /* We build DFA states which corresponds to the destination nodes
3347     from `state'.  `dests_node[i]' represents the nodes which i-th
3348     destination state contains, and `dests_ch[i]' represents the
3349     characters which i-th destination state accepts.  */
3350  if (__libc_use_alloca (sizeof (struct dests_alloc)))
3351    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3352  else
3353    {
3354      dests_alloc = re_malloc (struct dests_alloc, 1);
3355      if (BE (dests_alloc == NULL, 0))
3356	return 0;
3357      dests_node_malloced = true;
3358    }
3359  dests_node = dests_alloc->dests_node;
3360  dests_ch = dests_alloc->dests_ch;
3361
3362  /* Initialize transiton table.  */
3363  state->word_trtable = state->trtable = NULL;
3364
3365  /* At first, group all nodes belonging to `state' into several
3366     destinations.  */
3367  ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3368  if (BE (ndests <= 0, 0))
3369    {
3370      if (dests_node_malloced)
3371	free (dests_alloc);
3372      /* Return 0 in case of an error, 1 otherwise.  */
3373      if (ndests == 0)
3374	{
3375	  state->trtable = (re_dfastate_t **)
3376	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3377	  if (BE (state->trtable == NULL, 0))
3378	    return 0;
3379	  return 1;
3380	}
3381      return 0;
3382    }
3383
3384  err = re_node_set_alloc (&follows, ndests + 1);
3385  if (BE (err != REG_NOERROR, 0))
3386    goto out_free;
3387
3388  /* Avoid arithmetic overflow in size calculation.  */
3389  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3390	    / (3 * sizeof (re_dfastate_t *)))
3391	   < ndests),
3392	  0))
3393    goto out_free;
3394
3395  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3396			 + ndests * 3 * sizeof (re_dfastate_t *)))
3397    dest_states = (re_dfastate_t **)
3398      alloca (ndests * 3 * sizeof (re_dfastate_t *));
3399  else
3400    {
3401      dest_states = (re_dfastate_t **)
3402	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3403      if (BE (dest_states == NULL, 0))
3404	{
3405out_free:
3406	  if (dest_states_malloced)
3407	    free (dest_states);
3408	  re_node_set_free (&follows);
3409	  for (i = 0; i < ndests; ++i)
3410	    re_node_set_free (dests_node + i);
3411	  if (dests_node_malloced)
3412	    free (dests_alloc);
3413	  return 0;
3414	}
3415      dest_states_malloced = true;
3416    }
3417  dest_states_word = dest_states + ndests;
3418  dest_states_nl = dest_states_word + ndests;
3419  bitset_empty (acceptable);
3420
3421  /* Then build the states for all destinations.  */
3422  for (i = 0; i < ndests; ++i)
3423    {
3424      int next_node;
3425      re_node_set_empty (&follows);
3426      /* Merge the follows of this destination states.  */
3427      for (j = 0; j < dests_node[i].nelem; ++j)
3428	{
3429	  next_node = dfa->nexts[dests_node[i].elems[j]];
3430	  if (next_node != -1)
3431	    {
3432	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3433	      if (BE (err != REG_NOERROR, 0))
3434		goto out_free;
3435	    }
3436	}
3437      dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3438      if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3439	goto out_free;
3440      /* If the new state has context constraint,
3441	 build appropriate states for these contexts.  */
3442      if (dest_states[i]->has_constraint)
3443	{
3444	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3445							  CONTEXT_WORD);
3446	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3447	    goto out_free;
3448
3449	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3450	    need_word_trtable = 1;
3451
3452	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3453							CONTEXT_NEWLINE);
3454	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3455	    goto out_free;
3456 	}
3457      else
3458	{
3459	  dest_states_word[i] = dest_states[i];
3460	  dest_states_nl[i] = dest_states[i];
3461	}
3462      bitset_merge (acceptable, dests_ch[i]);
3463    }
3464
3465  if (!BE (need_word_trtable, 0))
3466    {
3467      /* We don't care about whether the following character is a word
3468	 character, or we are in a single-byte character set so we can
3469	 discern by looking at the character code: allocate a
3470	 256-entry transition table.  */
3471      trtable = state->trtable =
3472	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3473      if (BE (trtable == NULL, 0))
3474	goto out_free;
3475
3476      /* For all characters ch...:  */
3477      for (i = 0; i < BITSET_WORDS; ++i)
3478	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3479	     elem;
3480	     mask <<= 1, elem >>= 1, ++ch)
3481	  if (BE (elem & 1, 0))
3482	    {
3483	      /* There must be exactly one destination which accepts
3484		 character ch.  See group_nodes_into_DFAstates.  */
3485	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3486		;
3487
3488	      /* j-th destination accepts the word character ch.  */
3489	      if (dfa->word_char[i] & mask)
3490		trtable[ch] = dest_states_word[j];
3491	      else
3492		trtable[ch] = dest_states[j];
3493	    }
3494    }
3495  else
3496    {
3497      /* We care about whether the following character is a word
3498	 character, and we are in a multi-byte character set: discern
3499	 by looking at the character code: build two 256-entry
3500	 transition tables, one starting at trtable[0] and one
3501	 starting at trtable[SBC_MAX].  */
3502      trtable = state->word_trtable =
3503	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3504      if (BE (trtable == NULL, 0))
3505	goto out_free;
3506
3507      /* For all characters ch...:  */
3508      for (i = 0; i < BITSET_WORDS; ++i)
3509	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3510	     elem;
3511	     mask <<= 1, elem >>= 1, ++ch)
3512	  if (BE (elem & 1, 0))
3513	    {
3514	      /* There must be exactly one destination which accepts
3515		 character ch.  See group_nodes_into_DFAstates.  */
3516	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3517		;
3518
3519	      /* j-th destination accepts the word character ch.  */
3520	      trtable[ch] = dest_states[j];
3521	      trtable[ch + SBC_MAX] = dest_states_word[j];
3522	    }
3523    }
3524
3525  /* new line */
3526  if (bitset_contain (acceptable, NEWLINE_CHAR))
3527    {
3528      /* The current state accepts newline character.  */
3529      for (j = 0; j < ndests; ++j)
3530	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3531	  {
3532	    /* k-th destination accepts newline character.  */
3533	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3534	    if (need_word_trtable)
3535	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3536	    /* There must be only one destination which accepts
3537	       newline.  See group_nodes_into_DFAstates.  */
3538	    break;
3539	  }
3540    }
3541
3542  if (dest_states_malloced)
3543    free (dest_states);
3544
3545  re_node_set_free (&follows);
3546  for (i = 0; i < ndests; ++i)
3547    re_node_set_free (dests_node + i);
3548
3549  if (dests_node_malloced)
3550    free (dests_alloc);
3551
3552  return 1;
3553}
3554
3555/* Group all nodes belonging to STATE into several destinations.
3556   Then for all destinations, set the nodes belonging to the destination
3557   to DESTS_NODE[i] and set the characters accepted by the destination
3558   to DEST_CH[i].  This function return the number of destinations.  */
3559
3560static int
3561internal_function
3562group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3563			    re_node_set *dests_node, bitset_t *dests_ch)
3564{
3565  reg_errcode_t err;
3566  int result;
3567  int i, j, k;
3568  int ndests; /* Number of the destinations from `state'.  */
3569  bitset_t accepts; /* Characters a node can accept.  */
3570  const re_node_set *cur_nodes = &state->nodes;
3571  bitset_empty (accepts);
3572  ndests = 0;
3573
3574  /* For all the nodes belonging to `state',  */
3575  for (i = 0; i < cur_nodes->nelem; ++i)
3576    {
3577      re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3578      re_token_type_t type = node->type;
3579      unsigned int constraint = node->constraint;
3580
3581      /* Enumerate all single byte character this node can accept.  */
3582      if (type == CHARACTER)
3583	bitset_set (accepts, node->opr.c);
3584      else if (type == SIMPLE_BRACKET)
3585	{
3586	  bitset_merge (accepts, node->opr.sbcset);
3587	}
3588      else if (type == OP_PERIOD)
3589	{
3590#ifdef RE_ENABLE_I18N
3591	  if (dfa->mb_cur_max > 1)
3592	    bitset_merge (accepts, dfa->sb_char);
3593	  else
3594#endif
3595	    bitset_set_all (accepts);
3596	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3597	    bitset_clear (accepts, '\n');
3598	  if (dfa->syntax & RE_DOT_NOT_NULL)
3599	    bitset_clear (accepts, '\0');
3600	}
3601#ifdef RE_ENABLE_I18N
3602      else if (type == OP_UTF8_PERIOD)
3603	{
3604	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
3605	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3606	    bitset_clear (accepts, '\n');
3607	  if (dfa->syntax & RE_DOT_NOT_NULL)
3608	    bitset_clear (accepts, '\0');
3609	}
3610#endif
3611      else
3612	continue;
3613
3614      /* Check the `accepts' and sift the characters which are not
3615	 match it the context.  */
3616      if (constraint)
3617	{
3618	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3619	    {
3620	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3621	      bitset_empty (accepts);
3622	      if (accepts_newline)
3623		bitset_set (accepts, NEWLINE_CHAR);
3624	      else
3625		continue;
3626	    }
3627	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3628	    {
3629	      bitset_empty (accepts);
3630	      continue;
3631	    }
3632
3633	  if (constraint & NEXT_WORD_CONSTRAINT)
3634	    {
3635	      bitset_word_t any_set = 0;
3636	      if (type == CHARACTER && !node->word_char)
3637		{
3638		  bitset_empty (accepts);
3639		  continue;
3640		}
3641#ifdef RE_ENABLE_I18N
3642	      if (dfa->mb_cur_max > 1)
3643		for (j = 0; j < BITSET_WORDS; ++j)
3644		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3645	      else
3646#endif
3647		for (j = 0; j < BITSET_WORDS; ++j)
3648		  any_set |= (accepts[j] &= dfa->word_char[j]);
3649	      if (!any_set)
3650		continue;
3651	    }
3652	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3653	    {
3654	      bitset_word_t any_set = 0;
3655	      if (type == CHARACTER && node->word_char)
3656		{
3657		  bitset_empty (accepts);
3658		  continue;
3659		}
3660#ifdef RE_ENABLE_I18N
3661	      if (dfa->mb_cur_max > 1)
3662		for (j = 0; j < BITSET_WORDS; ++j)
3663		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3664	      else
3665#endif
3666		for (j = 0; j < BITSET_WORDS; ++j)
3667		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3668	      if (!any_set)
3669		continue;
3670	    }
3671	}
3672
3673      /* Then divide `accepts' into DFA states, or create a new
3674	 state.  Above, we make sure that accepts is not empty.  */
3675      for (j = 0; j < ndests; ++j)
3676	{
3677	  bitset_t intersec; /* Intersection sets, see below.  */
3678	  bitset_t remains;
3679	  /* Flags, see below.  */
3680	  bitset_word_t has_intersec, not_subset, not_consumed;
3681
3682	  /* Optimization, skip if this state doesn't accept the character.  */
3683	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3684	    continue;
3685
3686	  /* Enumerate the intersection set of this state and `accepts'.  */
3687	  has_intersec = 0;
3688	  for (k = 0; k < BITSET_WORDS; ++k)
3689	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3690	  /* And skip if the intersection set is empty.  */
3691	  if (!has_intersec)
3692	    continue;
3693
3694	  /* Then check if this state is a subset of `accepts'.  */
3695	  not_subset = not_consumed = 0;
3696	  for (k = 0; k < BITSET_WORDS; ++k)
3697	    {
3698	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3699	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3700	    }
3701
3702	  /* If this state isn't a subset of `accepts', create a
3703	     new group state, which has the `remains'. */
3704	  if (not_subset)
3705	    {
3706	      bitset_copy (dests_ch[ndests], remains);
3707	      bitset_copy (dests_ch[j], intersec);
3708	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3709	      if (BE (err != REG_NOERROR, 0))
3710		goto error_return;
3711	      ++ndests;
3712	    }
3713
3714	  /* Put the position in the current group. */
3715	  result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3716	  if (BE (result < 0, 0))
3717	    goto error_return;
3718
3719	  /* If all characters are consumed, go to next node. */
3720	  if (!not_consumed)
3721	    break;
3722	}
3723      /* Some characters remain, create a new group. */
3724      if (j == ndests)
3725	{
3726	  bitset_copy (dests_ch[ndests], accepts);
3727	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3728	  if (BE (err != REG_NOERROR, 0))
3729	    goto error_return;
3730	  ++ndests;
3731	  bitset_empty (accepts);
3732	}
3733    }
3734  return ndests;
3735 error_return:
3736  for (j = 0; j < ndests; ++j)
3737    re_node_set_free (dests_node + j);
3738  return -1;
3739}
3740
3741#ifdef RE_ENABLE_I18N
3742/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3743   Return the number of the bytes the node accepts.
3744   STR_IDX is the current index of the input string.
3745
3746   This function handles the nodes which can accept one character, or
3747   one collating element like '.', '[a-z]', opposite to the other nodes
3748   can only accept one byte.  */
3749
3750static int
3751internal_function
3752check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
3753			 const re_string_t *input, int str_idx)
3754{
3755  const re_token_t *node = dfa->nodes + node_idx;
3756  int char_len, elem_len;
3757  int i;
3758
3759  if (BE (node->type == OP_UTF8_PERIOD, 0))
3760    {
3761      unsigned char c = re_string_byte_at (input, str_idx), d;
3762      if (BE (c < 0xc2, 1))
3763	return 0;
3764
3765      if (str_idx + 2 > input->len)
3766	return 0;
3767
3768      d = re_string_byte_at (input, str_idx + 1);
3769      if (c < 0xe0)
3770	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3771      else if (c < 0xf0)
3772	{
3773	  char_len = 3;
3774	  if (c == 0xe0 && d < 0xa0)
3775	    return 0;
3776	}
3777      else if (c < 0xf8)
3778	{
3779	  char_len = 4;
3780	  if (c == 0xf0 && d < 0x90)
3781	    return 0;
3782	}
3783      else if (c < 0xfc)
3784	{
3785	  char_len = 5;
3786	  if (c == 0xf8 && d < 0x88)
3787	    return 0;
3788	}
3789      else if (c < 0xfe)
3790	{
3791	  char_len = 6;
3792	  if (c == 0xfc && d < 0x84)
3793	    return 0;
3794	}
3795      else
3796	return 0;
3797
3798      if (str_idx + char_len > input->len)
3799	return 0;
3800
3801      for (i = 1; i < char_len; ++i)
3802	{
3803	  d = re_string_byte_at (input, str_idx + i);
3804	  if (d < 0x80 || d > 0xbf)
3805	    return 0;
3806	}
3807      return char_len;
3808    }
3809
3810  char_len = re_string_char_size_at (input, str_idx);
3811  if (node->type == OP_PERIOD)
3812    {
3813      if (char_len <= 1)
3814	return 0;
3815      /* FIXME: I don't think this if is needed, as both '\n'
3816	 and '\0' are char_len == 1.  */
3817      /* '.' accepts any one character except the following two cases.  */
3818      if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3819	   re_string_byte_at (input, str_idx) == '\n') ||
3820	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3821	   re_string_byte_at (input, str_idx) == '\0'))
3822	return 0;
3823      return char_len;
3824    }
3825
3826  elem_len = re_string_elem_size_at (input, str_idx);
3827  if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3828    return 0;
3829
3830  if (node->type == COMPLEX_BRACKET)
3831    {
3832      const re_charset_t *cset = node->opr.mbcset;
3833# ifdef _LIBC
3834      const unsigned char *pin
3835	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3836      int j;
3837      uint32_t nrules;
3838# endif /* _LIBC */
3839      int match_len = 0;
3840      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3841		    ? re_string_wchar_at (input, str_idx) : 0);
3842
3843      /* match with multibyte character?  */
3844      for (i = 0; i < cset->nmbchars; ++i)
3845	if (wc == cset->mbchars[i])
3846	  {
3847	    match_len = char_len;
3848	    goto check_node_accept_bytes_match;
3849	  }
3850      /* match with character_class?  */
3851      for (i = 0; i < cset->nchar_classes; ++i)
3852	{
3853	  wctype_t wt = cset->char_classes[i];
3854	  if (__iswctype (wc, wt))
3855	    {
3856	      match_len = char_len;
3857	      goto check_node_accept_bytes_match;
3858	    }
3859	}
3860
3861# ifdef _LIBC
3862      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3863      if (nrules != 0)
3864	{
3865	  unsigned int in_collseq = 0;
3866	  const int32_t *table, *indirect;
3867	  const unsigned char *weights, *extra;
3868	  const char *collseqwc;
3869	  /* This #include defines a local function!  */
3870#  include <locale/weight.h>
3871
3872	  /* match with collating_symbol?  */
3873	  if (cset->ncoll_syms)
3874	    extra = (const unsigned char *)
3875	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3876	  for (i = 0; i < cset->ncoll_syms; ++i)
3877	    {
3878	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3879	      /* Compare the length of input collating element and
3880		 the length of current collating element.  */
3881	      if (*coll_sym != elem_len)
3882		continue;
3883	      /* Compare each bytes.  */
3884	      for (j = 0; j < *coll_sym; j++)
3885		if (pin[j] != coll_sym[1 + j])
3886		  break;
3887	      if (j == *coll_sym)
3888		{
3889		  /* Match if every bytes is equal.  */
3890		  match_len = j;
3891		  goto check_node_accept_bytes_match;
3892		}
3893	    }
3894
3895	  if (cset->nranges)
3896	    {
3897	      if (elem_len <= char_len)
3898		{
3899		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3900		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3901		}
3902	      else
3903		in_collseq = find_collation_sequence_value (pin, elem_len);
3904	    }
3905	  /* match with range expression?  */
3906	  for (i = 0; i < cset->nranges; ++i)
3907	    if (cset->range_starts[i] <= in_collseq
3908		&& in_collseq <= cset->range_ends[i])
3909	      {
3910		match_len = elem_len;
3911		goto check_node_accept_bytes_match;
3912	      }
3913
3914	  /* match with equivalence_class?  */
3915	  if (cset->nequiv_classes)
3916	    {
3917	      const unsigned char *cp = pin;
3918	      table = (const int32_t *)
3919		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3920	      weights = (const unsigned char *)
3921		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3922	      extra = (const unsigned char *)
3923		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3924	      indirect = (const int32_t *)
3925		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3926	      int32_t idx = findidx (&cp, elem_len);
3927	      if (idx > 0)
3928		for (i = 0; i < cset->nequiv_classes; ++i)
3929		  {
3930		    int32_t equiv_class_idx = cset->equiv_classes[i];
3931		    size_t weight_len = weights[idx & 0xffffff];
3932		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3933			&& (idx >> 24) == (equiv_class_idx >> 24))
3934		      {
3935			int cnt = 0;
3936
3937			idx &= 0xffffff;
3938			equiv_class_idx &= 0xffffff;
3939
3940			while (cnt <= weight_len
3941			       && (weights[equiv_class_idx + 1 + cnt]
3942				   == weights[idx + 1 + cnt]))
3943			  ++cnt;
3944			if (cnt > weight_len)
3945			  {
3946			    match_len = elem_len;
3947			    goto check_node_accept_bytes_match;
3948			  }
3949		      }
3950		  }
3951	    }
3952	}
3953      else
3954# endif /* _LIBC */
3955	{
3956	  /* match with range expression?  */
3957#if __GNUC__ >= 2
3958	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3959#else
3960	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3961	  cmp_buf[2] = wc;
3962#endif
3963	  for (i = 0; i < cset->nranges; ++i)
3964	    {
3965	      cmp_buf[0] = cset->range_starts[i];
3966	      cmp_buf[4] = cset->range_ends[i];
3967	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3968		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3969		{
3970		  match_len = char_len;
3971		  goto check_node_accept_bytes_match;
3972		}
3973	    }
3974	}
3975    check_node_accept_bytes_match:
3976      if (!cset->non_match)
3977	return match_len;
3978      else
3979	{
3980	  if (match_len > 0)
3981	    return 0;
3982	  else
3983	    return (elem_len > char_len) ? elem_len : char_len;
3984	}
3985    }
3986  return 0;
3987}
3988
3989# ifdef _LIBC
3990static unsigned int
3991internal_function
3992find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3993{
3994  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3995  if (nrules == 0)
3996    {
3997      if (mbs_len == 1)
3998	{
3999	  /* No valid character.  Match it as a single byte character.  */
4000	  const unsigned char *collseq = (const unsigned char *)
4001	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4002	  return collseq[mbs[0]];
4003	}
4004      return UINT_MAX;
4005    }
4006  else
4007    {
4008      int32_t idx;
4009      const unsigned char *extra = (const unsigned char *)
4010	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4011      int32_t extrasize = (const unsigned char *)
4012	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4013
4014      for (idx = 0; idx < extrasize;)
4015	{
4016	  int mbs_cnt, found = 0;
4017	  int32_t elem_mbs_len;
4018	  /* Skip the name of collating element name.  */
4019	  idx = idx + extra[idx] + 1;
4020	  elem_mbs_len = extra[idx++];
4021	  if (mbs_len == elem_mbs_len)
4022	    {
4023	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4024		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4025		  break;
4026	      if (mbs_cnt == elem_mbs_len)
4027		/* Found the entry.  */
4028		found = 1;
4029	    }
4030	  /* Skip the byte sequence of the collating element.  */
4031	  idx += elem_mbs_len;
4032	  /* Adjust for the alignment.  */
4033	  idx = (idx + 3) & ~3;
4034	  /* Skip the collation sequence value.  */
4035	  idx += sizeof (uint32_t);
4036	  /* Skip the wide char sequence of the collating element.  */
4037	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4038	  /* If we found the entry, return the sequence value.  */
4039	  if (found)
4040	    return *(uint32_t *) (extra + idx);
4041	  /* Skip the collation sequence value.  */
4042	  idx += sizeof (uint32_t);
4043	}
4044      return UINT_MAX;
4045    }
4046}
4047# endif /* _LIBC */
4048#endif /* RE_ENABLE_I18N */
4049
4050/* Check whether the node accepts the byte which is IDX-th
4051   byte of the INPUT.  */
4052
4053static int
4054internal_function
4055check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4056		   int idx)
4057{
4058  unsigned char ch;
4059  ch = re_string_byte_at (&mctx->input, idx);
4060  switch (node->type)
4061    {
4062    case CHARACTER:
4063      if (node->opr.c != ch)
4064	return 0;
4065      break;
4066
4067    case SIMPLE_BRACKET:
4068      if (!bitset_contain (node->opr.sbcset, ch))
4069	return 0;
4070      break;
4071
4072#ifdef RE_ENABLE_I18N
4073    case OP_UTF8_PERIOD:
4074      if (ch >= 0x80)
4075	return 0;
4076      /* FALLTHROUGH */
4077#endif
4078    case OP_PERIOD:
4079      if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4080	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4081	return 0;
4082      break;
4083
4084    default:
4085      return 0;
4086    }
4087
4088  if (node->constraint)
4089    {
4090      /* The node has constraints.  Check whether the current context
4091	 satisfies the constraints.  */
4092      unsigned int context = re_string_context_at (&mctx->input, idx,
4093						   mctx->eflags);
4094      if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4095	return 0;
4096    }
4097
4098  return 1;
4099}
4100
4101/* Extend the buffers, if the buffers have run out.  */
4102
4103static reg_errcode_t
4104internal_function __attribute_warn_unused_result__
4105extend_buffers (re_match_context_t *mctx, int min_len)
4106{
4107  reg_errcode_t ret;
4108  re_string_t *pstr = &mctx->input;
4109
4110  /* Avoid overflow.  */
4111  if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
4112    return REG_ESPACE;
4113
4114  /* Double the lengthes of the buffers, but allocate at least MIN_LEN.  */
4115  ret = re_string_realloc_buffers (pstr,
4116				   MAX (min_len,
4117					MIN (pstr->len, pstr->bufs_len * 2)));
4118  if (BE (ret != REG_NOERROR, 0))
4119    return ret;
4120
4121  if (mctx->state_log != NULL)
4122    {
4123      /* And double the length of state_log.  */
4124      /* XXX We have no indication of the size of this buffer.  If this
4125	 allocation fail we have no indication that the state_log array
4126	 does not have the right size.  */
4127      re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4128					      pstr->bufs_len + 1);
4129      if (BE (new_array == NULL, 0))
4130	return REG_ESPACE;
4131      mctx->state_log = new_array;
4132    }
4133
4134  /* Then reconstruct the buffers.  */
4135  if (pstr->icase)
4136    {
4137#ifdef RE_ENABLE_I18N
4138      if (pstr->mb_cur_max > 1)
4139	{
4140	  ret = build_wcs_upper_buffer (pstr);
4141	  if (BE (ret != REG_NOERROR, 0))
4142	    return ret;
4143	}
4144      else
4145#endif /* RE_ENABLE_I18N  */
4146	build_upper_buffer (pstr);
4147    }
4148  else
4149    {
4150#ifdef RE_ENABLE_I18N
4151      if (pstr->mb_cur_max > 1)
4152	build_wcs_buffer (pstr);
4153      else
4154#endif /* RE_ENABLE_I18N  */
4155	{
4156	  if (pstr->trans != NULL)
4157	    re_string_translate_buffer (pstr);
4158	}
4159    }
4160  return REG_NOERROR;
4161}
4162
4163
4164/* Functions for matching context.  */
4165
4166/* Initialize MCTX.  */
4167
4168static reg_errcode_t
4169internal_function __attribute_warn_unused_result__
4170match_ctx_init (re_match_context_t *mctx, int eflags, int n)
4171{
4172  mctx->eflags = eflags;
4173  mctx->match_last = -1;
4174  if (n > 0)
4175    {
4176      mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4177      mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4178      if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4179	return REG_ESPACE;
4180    }
4181  /* Already zero-ed by the caller.
4182     else
4183       mctx->bkref_ents = NULL;
4184     mctx->nbkref_ents = 0;
4185     mctx->nsub_tops = 0;  */
4186  mctx->abkref_ents = n;
4187  mctx->max_mb_elem_len = 1;
4188  mctx->asub_tops = n;
4189  return REG_NOERROR;
4190}
4191
4192/* Clean the entries which depend on the current input in MCTX.
4193   This function must be invoked when the matcher changes the start index
4194   of the input, or changes the input string.  */
4195
4196static void
4197internal_function
4198match_ctx_clean (re_match_context_t *mctx)
4199{
4200  int st_idx;
4201  for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4202    {
4203      int sl_idx;
4204      re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4205      for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4206	{
4207	  re_sub_match_last_t *last = top->lasts[sl_idx];
4208	  re_free (last->path.array);
4209	  re_free (last);
4210	}
4211      re_free (top->lasts);
4212      if (top->path)
4213	{
4214	  re_free (top->path->array);
4215	  re_free (top->path);
4216	}
4217      free (top);
4218    }
4219
4220  mctx->nsub_tops = 0;
4221  mctx->nbkref_ents = 0;
4222}
4223
4224/* Free all the memory associated with MCTX.  */
4225
4226static void
4227internal_function
4228match_ctx_free (re_match_context_t *mctx)
4229{
4230  /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4231  match_ctx_clean (mctx);
4232  re_free (mctx->sub_tops);
4233  re_free (mctx->bkref_ents);
4234}
4235
4236/* Add a new backreference entry to MCTX.
4237   Note that we assume that caller never call this function with duplicate
4238   entry, and call with STR_IDX which isn't smaller than any existing entry.
4239*/
4240
4241static reg_errcode_t
4242internal_function __attribute_warn_unused_result__
4243match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,
4244		     int to)
4245{
4246  if (mctx->nbkref_ents >= mctx->abkref_ents)
4247    {
4248      struct re_backref_cache_entry* new_entry;
4249      new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4250			      mctx->abkref_ents * 2);
4251      if (BE (new_entry == NULL, 0))
4252	{
4253	  re_free (mctx->bkref_ents);
4254	  return REG_ESPACE;
4255	}
4256      mctx->bkref_ents = new_entry;
4257      memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4258	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4259      mctx->abkref_ents *= 2;
4260    }
4261  if (mctx->nbkref_ents > 0
4262      && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4263    mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4264
4265  mctx->bkref_ents[mctx->nbkref_ents].node = node;
4266  mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4267  mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4268  mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4269
4270  /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4271     If bit N is clear, means that this entry won't epsilon-transition to
4272     an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4273     it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4274     such node.
4275
4276     A backreference does not epsilon-transition unless it is empty, so set
4277     to all zeros if FROM != TO.  */
4278  mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4279    = (from == to ? ~0 : 0);
4280
4281  mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4282  if (mctx->max_mb_elem_len < to - from)
4283    mctx->max_mb_elem_len = to - from;
4284  return REG_NOERROR;
4285}
4286
4287/* Search for the first entry which has the same str_idx, or -1 if none is
4288   found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4289
4290static int
4291internal_function
4292search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
4293{
4294  int left, right, mid, last;
4295  last = right = mctx->nbkref_ents;
4296  for (left = 0; left < right;)
4297    {
4298      mid = (left + right) / 2;
4299      if (mctx->bkref_ents[mid].str_idx < str_idx)
4300	left = mid + 1;
4301      else
4302	right = mid;
4303    }
4304  if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4305    return left;
4306  else
4307    return -1;
4308}
4309
4310/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4311   at STR_IDX.  */
4312
4313static reg_errcode_t
4314internal_function __attribute_warn_unused_result__
4315match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)
4316{
4317#ifdef DEBUG
4318  assert (mctx->sub_tops != NULL);
4319  assert (mctx->asub_tops > 0);
4320#endif
4321  if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4322    {
4323      int new_asub_tops = mctx->asub_tops * 2;
4324      re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4325						   re_sub_match_top_t *,
4326						   new_asub_tops);
4327      if (BE (new_array == NULL, 0))
4328	return REG_ESPACE;
4329      mctx->sub_tops = new_array;
4330      mctx->asub_tops = new_asub_tops;
4331    }
4332  mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4333  if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4334    return REG_ESPACE;
4335  mctx->sub_tops[mctx->nsub_tops]->node = node;
4336  mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4337  return REG_NOERROR;
4338}
4339
4340/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4341   at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4342
4343static re_sub_match_last_t *
4344internal_function
4345match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)
4346{
4347  re_sub_match_last_t *new_entry;
4348  if (BE (subtop->nlasts == subtop->alasts, 0))
4349    {
4350      int new_alasts = 2 * subtop->alasts + 1;
4351      re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4352						    re_sub_match_last_t *,
4353						    new_alasts);
4354      if (BE (new_array == NULL, 0))
4355	return NULL;
4356      subtop->lasts = new_array;
4357      subtop->alasts = new_alasts;
4358    }
4359  new_entry = calloc (1, sizeof (re_sub_match_last_t));
4360  if (BE (new_entry != NULL, 1))
4361    {
4362      subtop->lasts[subtop->nlasts] = new_entry;
4363      new_entry->node = node;
4364      new_entry->str_idx = str_idx;
4365      ++subtop->nlasts;
4366    }
4367  return new_entry;
4368}
4369
4370static void
4371internal_function
4372sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4373	       re_dfastate_t **limited_sts, int last_node, int last_str_idx)
4374{
4375  sctx->sifted_states = sifted_sts;
4376  sctx->limited_states = limited_sts;
4377  sctx->last_node = last_node;
4378  sctx->last_str_idx = last_str_idx;
4379  re_node_set_init_empty (&sctx->limits);
4380}
4381