1146040Stjr/* Extended regular expression matching and search library.
2250724Sjkim   Copyright (C) 2002-2006, 2010, 2011 Free Software Foundation, Inc.
3146040Stjr   This file is part of the GNU C Library.
4146040Stjr   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5146040Stjr
6146040Stjr   The GNU C Library is free software; you can redistribute it and/or
7146040Stjr   modify it under the terms of the GNU Lesser General Public
8146040Stjr   License as published by the Free Software Foundation; either
9146040Stjr   version 2.1 of the License, or (at your option) any later version.
10146040Stjr
11146040Stjr   The GNU C Library is distributed in the hope that it will be useful,
12146040Stjr   but WITHOUT ANY WARRANTY; without even the implied warranty of
13146040Stjr   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14146040Stjr   Lesser General Public License for more details.
15146040Stjr
16146040Stjr   You should have received a copy of the GNU Lesser General Public
17250724Sjkim   License along with the GNU C Library; if not, see
18250724Sjkim   <http://www.gnu.org/licenses/>.  */
19146040Stjr
20146040Stjrstatic void re_string_construct_common (const char *str, int len,
21146040Stjr					re_string_t *pstr,
22146040Stjr					RE_TRANSLATE_TYPE trans, int icase,
23146040Stjr					const re_dfa_t *dfa) internal_function;
24250724Sjkimstatic re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25146040Stjr					  const re_node_set *nodes,
26146040Stjr					  unsigned int hash) internal_function;
27250724Sjkimstatic re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28146040Stjr					  const re_node_set *nodes,
29146040Stjr					  unsigned int context,
30146040Stjr					  unsigned int hash) internal_function;
31146040Stjr
32146040Stjr/* Functions for string operation.  */
33146040Stjr
34146040Stjr/* This function allocate the buffers.  It is necessary to call
35146040Stjr   re_string_reconstruct before using the object.  */
36146040Stjr
37146040Stjrstatic reg_errcode_t
38250724Sjkiminternal_function __attribute_warn_unused_result__
39250724Sjkimre_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
40250724Sjkim		    RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
41146040Stjr{
42146040Stjr  reg_errcode_t ret;
43146040Stjr  int init_buf_len;
44146040Stjr
45146040Stjr  /* Ensure at least one character fits into the buffers.  */
46146040Stjr  if (init_len < dfa->mb_cur_max)
47146040Stjr    init_len = dfa->mb_cur_max;
48146040Stjr  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
49146040Stjr  re_string_construct_common (str, len, pstr, trans, icase, dfa);
50146040Stjr
51146040Stjr  ret = re_string_realloc_buffers (pstr, init_buf_len);
52146040Stjr  if (BE (ret != REG_NOERROR, 0))
53146040Stjr    return ret;
54146040Stjr
55146040Stjr  pstr->word_char = dfa->word_char;
56146040Stjr  pstr->word_ops_used = dfa->word_ops_used;
57146040Stjr  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
58146040Stjr  pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
59146040Stjr  pstr->valid_raw_len = pstr->valid_len;
60146040Stjr  return REG_NOERROR;
61146040Stjr}
62146040Stjr
63146040Stjr/* This function allocate the buffers, and initialize them.  */
64146040Stjr
65146040Stjrstatic reg_errcode_t
66250724Sjkiminternal_function __attribute_warn_unused_result__
67250724Sjkimre_string_construct (re_string_t *pstr, const char *str, int len,
68250724Sjkim		     RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
69146040Stjr{
70146040Stjr  reg_errcode_t ret;
71146040Stjr  memset (pstr, '\0', sizeof (re_string_t));
72146040Stjr  re_string_construct_common (str, len, pstr, trans, icase, dfa);
73146040Stjr
74146040Stjr  if (len > 0)
75146040Stjr    {
76146040Stjr      ret = re_string_realloc_buffers (pstr, len + 1);
77146040Stjr      if (BE (ret != REG_NOERROR, 0))
78146040Stjr	return ret;
79146040Stjr    }
80146040Stjr  pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
81146040Stjr
82146040Stjr  if (icase)
83146040Stjr    {
84146040Stjr#ifdef RE_ENABLE_I18N
85146040Stjr      if (dfa->mb_cur_max > 1)
86146040Stjr	{
87146040Stjr	  while (1)
88146040Stjr	    {
89146040Stjr	      ret = build_wcs_upper_buffer (pstr);
90146040Stjr	      if (BE (ret != REG_NOERROR, 0))
91146040Stjr		return ret;
92146040Stjr	      if (pstr->valid_raw_len >= len)
93146040Stjr		break;
94146040Stjr	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
95146040Stjr		break;
96146040Stjr	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
97146040Stjr	      if (BE (ret != REG_NOERROR, 0))
98146040Stjr		return ret;
99146040Stjr	    }
100146040Stjr	}
101146040Stjr      else
102146040Stjr#endif /* RE_ENABLE_I18N  */
103146040Stjr	build_upper_buffer (pstr);
104146040Stjr    }
105146040Stjr  else
106146040Stjr    {
107146040Stjr#ifdef RE_ENABLE_I18N
108146040Stjr      if (dfa->mb_cur_max > 1)
109146040Stjr	build_wcs_buffer (pstr);
110146040Stjr      else
111146040Stjr#endif /* RE_ENABLE_I18N  */
112146040Stjr	{
113146040Stjr	  if (trans != NULL)
114146040Stjr	    re_string_translate_buffer (pstr);
115146040Stjr	  else
116146040Stjr	    {
117146040Stjr	      pstr->valid_len = pstr->bufs_len;
118146040Stjr	      pstr->valid_raw_len = pstr->bufs_len;
119146040Stjr	    }
120146040Stjr	}
121146040Stjr    }
122146040Stjr
123146040Stjr  return REG_NOERROR;
124146040Stjr}
125146040Stjr
126146040Stjr/* Helper functions for re_string_allocate, and re_string_construct.  */
127146040Stjr
128146040Stjrstatic reg_errcode_t
129250724Sjkiminternal_function __attribute_warn_unused_result__
130250724Sjkimre_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
131146040Stjr{
132146040Stjr#ifdef RE_ENABLE_I18N
133146040Stjr  if (pstr->mb_cur_max > 1)
134146040Stjr    {
135250724Sjkim      wint_t *new_wcs;
136250724Sjkim
137250724Sjkim      /* Avoid overflow in realloc.  */
138250724Sjkim      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
139250724Sjkim      if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
140146040Stjr	return REG_ESPACE;
141250724Sjkim
142250724Sjkim      new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
143250724Sjkim      if (BE (new_wcs == NULL, 0))
144250724Sjkim	return REG_ESPACE;
145250724Sjkim      pstr->wcs = new_wcs;
146146040Stjr      if (pstr->offsets != NULL)
147146040Stjr	{
148250724Sjkim	  int *new_offsets = re_realloc (pstr->offsets, int, new_buf_len);
149250724Sjkim	  if (BE (new_offsets == NULL, 0))
150146040Stjr	    return REG_ESPACE;
151250724Sjkim	  pstr->offsets = new_offsets;
152146040Stjr	}
153146040Stjr    }
154146040Stjr#endif /* RE_ENABLE_I18N  */
155146040Stjr  if (pstr->mbs_allocated)
156146040Stjr    {
157250724Sjkim      unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
158250724Sjkim					   new_buf_len);
159250724Sjkim      if (BE (new_mbs == NULL, 0))
160146040Stjr	return REG_ESPACE;
161250724Sjkim      pstr->mbs = new_mbs;
162146040Stjr    }
163146040Stjr  pstr->bufs_len = new_buf_len;
164146040Stjr  return REG_NOERROR;
165146040Stjr}
166146040Stjr
167146040Stjr
168146040Stjrstatic void
169250724Sjkiminternal_function
170250724Sjkimre_string_construct_common (const char *str, int len, re_string_t *pstr,
171250724Sjkim			    RE_TRANSLATE_TYPE trans, int icase,
172250724Sjkim			    const re_dfa_t *dfa)
173146040Stjr{
174146040Stjr  pstr->raw_mbs = (const unsigned char *) str;
175146040Stjr  pstr->len = len;
176146040Stjr  pstr->raw_len = len;
177250724Sjkim  pstr->trans = trans;
178146040Stjr  pstr->icase = icase ? 1 : 0;
179146040Stjr  pstr->mbs_allocated = (trans != NULL || icase);
180146040Stjr  pstr->mb_cur_max = dfa->mb_cur_max;
181146040Stjr  pstr->is_utf8 = dfa->is_utf8;
182146040Stjr  pstr->map_notascii = dfa->map_notascii;
183146040Stjr  pstr->stop = pstr->len;
184146040Stjr  pstr->raw_stop = pstr->stop;
185146040Stjr}
186146040Stjr
187146040Stjr#ifdef RE_ENABLE_I18N
188146040Stjr
189146040Stjr/* Build wide character buffer PSTR->WCS.
190146040Stjr   If the byte sequence of the string are:
191146040Stjr     <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
192146040Stjr   Then wide character buffer will be:
193146040Stjr     <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
194146040Stjr   We use WEOF for padding, they indicate that the position isn't
195146040Stjr   a first byte of a multibyte character.
196146040Stjr
197146040Stjr   Note that this function assumes PSTR->VALID_LEN elements are already
198146040Stjr   built and starts from PSTR->VALID_LEN.  */
199146040Stjr
200146040Stjrstatic void
201250724Sjkiminternal_function
202250724Sjkimbuild_wcs_buffer (re_string_t *pstr)
203146040Stjr{
204146040Stjr#ifdef _LIBC
205250724Sjkim  unsigned char buf[MB_LEN_MAX];
206250724Sjkim  assert (MB_LEN_MAX >= pstr->mb_cur_max);
207146040Stjr#else
208146040Stjr  unsigned char buf[64];
209146040Stjr#endif
210146040Stjr  mbstate_t prev_st;
211146040Stjr  int byte_idx, end_idx, remain_len;
212146040Stjr  size_t mbclen;
213146040Stjr
214146040Stjr  /* Build the buffers from pstr->valid_len to either pstr->len or
215146040Stjr     pstr->bufs_len.  */
216146040Stjr  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
217146040Stjr  for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
218146040Stjr    {
219146040Stjr      wchar_t wc;
220146040Stjr      const char *p;
221146040Stjr
222146040Stjr      remain_len = end_idx - byte_idx;
223146040Stjr      prev_st = pstr->cur_state;
224146040Stjr      /* Apply the translation if we need.  */
225146040Stjr      if (BE (pstr->trans != NULL, 0))
226146040Stjr	{
227146040Stjr	  int i, ch;
228146040Stjr
229146040Stjr	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
230146040Stjr	    {
231146040Stjr	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
232146040Stjr	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
233146040Stjr	    }
234146040Stjr	  p = (const char *) buf;
235146040Stjr	}
236146040Stjr      else
237146040Stjr	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
238250724Sjkim      mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
239250724Sjkim      if (BE (mbclen == (size_t) -1 || mbclen == 0
240250724Sjkim	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
241146040Stjr	{
242146040Stjr	  /* We treat these cases as a singlebyte character.  */
243146040Stjr	  mbclen = 1;
244146040Stjr	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
245146040Stjr	  if (BE (pstr->trans != NULL, 0))
246146040Stjr	    wc = pstr->trans[wc];
247146040Stjr	  pstr->cur_state = prev_st;
248146040Stjr	}
249250724Sjkim      else if (BE (mbclen == (size_t) -2, 0))
250250724Sjkim	{
251250724Sjkim	  /* The buffer doesn't have enough space, finish to build.  */
252250724Sjkim	  pstr->cur_state = prev_st;
253250724Sjkim	  break;
254250724Sjkim	}
255146040Stjr
256146040Stjr      /* Write wide character and padding.  */
257146040Stjr      pstr->wcs[byte_idx++] = wc;
258146040Stjr      /* Write paddings.  */
259146040Stjr      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260146040Stjr	pstr->wcs[byte_idx++] = WEOF;
261146040Stjr    }
262146040Stjr  pstr->valid_len = byte_idx;
263146040Stjr  pstr->valid_raw_len = byte_idx;
264146040Stjr}
265146040Stjr
266146040Stjr/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267146040Stjr   but for REG_ICASE.  */
268146040Stjr
269250724Sjkimstatic reg_errcode_t
270250724Sjkiminternal_function __attribute_warn_unused_result__
271250724Sjkimbuild_wcs_upper_buffer (re_string_t *pstr)
272146040Stjr{
273146040Stjr  mbstate_t prev_st;
274146040Stjr  int src_idx, byte_idx, end_idx, remain_len;
275146040Stjr  size_t mbclen;
276146040Stjr#ifdef _LIBC
277250724Sjkim  char buf[MB_LEN_MAX];
278250724Sjkim  assert (MB_LEN_MAX >= pstr->mb_cur_max);
279146040Stjr#else
280146040Stjr  char buf[64];
281146040Stjr#endif
282146040Stjr
283146040Stjr  byte_idx = pstr->valid_len;
284146040Stjr  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
285146040Stjr
286146040Stjr  /* The following optimization assumes that ASCII characters can be
287146040Stjr     mapped to wide characters with a simple cast.  */
288146040Stjr  if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
289146040Stjr    {
290146040Stjr      while (byte_idx < end_idx)
291146040Stjr	{
292146040Stjr	  wchar_t wc;
293146040Stjr
294146040Stjr	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295146040Stjr	      && mbsinit (&pstr->cur_state))
296146040Stjr	    {
297146040Stjr	      /* In case of a singlebyte character.  */
298146040Stjr	      pstr->mbs[byte_idx]
299146040Stjr		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300146040Stjr	      /* The next step uses the assumption that wchar_t is encoded
301146040Stjr		 ASCII-safe: all ASCII values can be converted like this.  */
302146040Stjr	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
303146040Stjr	      ++byte_idx;
304146040Stjr	      continue;
305146040Stjr	    }
306146040Stjr
307146040Stjr	  remain_len = end_idx - byte_idx;
308146040Stjr	  prev_st = pstr->cur_state;
309250724Sjkim	  mbclen = __mbrtowc (&wc,
310250724Sjkim			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311250724Sjkim			       + byte_idx), remain_len, &pstr->cur_state);
312146040Stjr	  if (BE (mbclen + 2 > 2, 1))
313146040Stjr	    {
314146040Stjr	      wchar_t wcu = wc;
315146040Stjr	      if (iswlower (wc))
316146040Stjr		{
317146040Stjr		  size_t mbcdlen;
318146040Stjr
319146040Stjr		  wcu = towupper (wc);
320146040Stjr		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
321146040Stjr		  if (BE (mbclen == mbcdlen, 1))
322146040Stjr		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
323146040Stjr		  else
324146040Stjr		    {
325146040Stjr		      src_idx = byte_idx;
326146040Stjr		      goto offsets_needed;
327146040Stjr		    }
328146040Stjr		}
329146040Stjr	      else
330146040Stjr		memcpy (pstr->mbs + byte_idx,
331146040Stjr			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332146040Stjr	      pstr->wcs[byte_idx++] = wcu;
333146040Stjr	      /* Write paddings.  */
334146040Stjr	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335146040Stjr		pstr->wcs[byte_idx++] = WEOF;
336146040Stjr	    }
337250724Sjkim	  else if (mbclen == (size_t) -1 || mbclen == 0
338250724Sjkim		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
339146040Stjr	    {
340250724Sjkim	      /* It is an invalid character, an incomplete character
341250724Sjkim		 at the end of the string, or '\0'.  Just use the byte.  */
342146040Stjr	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
343146040Stjr	      pstr->mbs[byte_idx] = ch;
344146040Stjr	      /* And also cast it to wide char.  */
345146040Stjr	      pstr->wcs[byte_idx++] = (wchar_t) ch;
346146040Stjr	      if (BE (mbclen == (size_t) -1, 0))
347146040Stjr		pstr->cur_state = prev_st;
348146040Stjr	    }
349146040Stjr	  else
350146040Stjr	    {
351146040Stjr	      /* The buffer doesn't have enough space, finish to build.  */
352146040Stjr	      pstr->cur_state = prev_st;
353146040Stjr	      break;
354146040Stjr	    }
355146040Stjr	}
356146040Stjr      pstr->valid_len = byte_idx;
357146040Stjr      pstr->valid_raw_len = byte_idx;
358146040Stjr      return REG_NOERROR;
359146040Stjr    }
360146040Stjr  else
361146040Stjr    for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
362146040Stjr      {
363146040Stjr	wchar_t wc;
364146040Stjr	const char *p;
365146040Stjr      offsets_needed:
366146040Stjr	remain_len = end_idx - byte_idx;
367146040Stjr	prev_st = pstr->cur_state;
368146040Stjr	if (BE (pstr->trans != NULL, 0))
369146040Stjr	  {
370146040Stjr	    int i, ch;
371146040Stjr
372146040Stjr	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
373146040Stjr	      {
374146040Stjr		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
375146040Stjr		buf[i] = pstr->trans[ch];
376146040Stjr	      }
377146040Stjr	    p = (const char *) buf;
378146040Stjr	  }
379146040Stjr	else
380146040Stjr	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
381250724Sjkim	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
382146040Stjr	if (BE (mbclen + 2 > 2, 1))
383146040Stjr	  {
384146040Stjr	    wchar_t wcu = wc;
385146040Stjr	    if (iswlower (wc))
386146040Stjr	      {
387146040Stjr		size_t mbcdlen;
388146040Stjr
389146040Stjr		wcu = towupper (wc);
390146040Stjr		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
391146040Stjr		if (BE (mbclen == mbcdlen, 1))
392146040Stjr		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
393146040Stjr		else if (mbcdlen != (size_t) -1)
394146040Stjr		  {
395146040Stjr		    size_t i;
396146040Stjr
397146040Stjr		    if (byte_idx + mbcdlen > pstr->bufs_len)
398146040Stjr		      {
399146040Stjr			pstr->cur_state = prev_st;
400146040Stjr			break;
401146040Stjr		      }
402146040Stjr
403146040Stjr		    if (pstr->offsets == NULL)
404146040Stjr		      {
405146040Stjr			pstr->offsets = re_malloc (int, pstr->bufs_len);
406146040Stjr
407146040Stjr			if (pstr->offsets == NULL)
408146040Stjr			  return REG_ESPACE;
409146040Stjr		      }
410146040Stjr		    if (!pstr->offsets_needed)
411146040Stjr		      {
412146040Stjr			for (i = 0; i < (size_t) byte_idx; ++i)
413146040Stjr			  pstr->offsets[i] = i;
414146040Stjr			pstr->offsets_needed = 1;
415146040Stjr		      }
416146040Stjr
417146040Stjr		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418146040Stjr		    pstr->wcs[byte_idx] = wcu;
419146040Stjr		    pstr->offsets[byte_idx] = src_idx;
420146040Stjr		    for (i = 1; i < mbcdlen; ++i)
421146040Stjr		      {
422146040Stjr			pstr->offsets[byte_idx + i]
423146040Stjr			  = src_idx + (i < mbclen ? i : mbclen - 1);
424146040Stjr			pstr->wcs[byte_idx + i] = WEOF;
425146040Stjr		      }
426146040Stjr		    pstr->len += mbcdlen - mbclen;
427146040Stjr		    if (pstr->raw_stop > src_idx)
428146040Stjr		      pstr->stop += mbcdlen - mbclen;
429146040Stjr		    end_idx = (pstr->bufs_len > pstr->len)
430146040Stjr			      ? pstr->len : pstr->bufs_len;
431146040Stjr		    byte_idx += mbcdlen;
432146040Stjr		    src_idx += mbclen;
433146040Stjr		    continue;
434146040Stjr		  }
435250724Sjkim		else
436250724Sjkim		  memcpy (pstr->mbs + byte_idx, p, mbclen);
437146040Stjr	      }
438146040Stjr	    else
439146040Stjr	      memcpy (pstr->mbs + byte_idx, p, mbclen);
440146040Stjr
441146040Stjr	    if (BE (pstr->offsets_needed != 0, 0))
442146040Stjr	      {
443146040Stjr		size_t i;
444146040Stjr		for (i = 0; i < mbclen; ++i)
445146040Stjr		  pstr->offsets[byte_idx + i] = src_idx + i;
446146040Stjr	      }
447146040Stjr	    src_idx += mbclen;
448146040Stjr
449146040Stjr	    pstr->wcs[byte_idx++] = wcu;
450146040Stjr	    /* Write paddings.  */
451146040Stjr	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452146040Stjr	      pstr->wcs[byte_idx++] = WEOF;
453146040Stjr	  }
454250724Sjkim	else if (mbclen == (size_t) -1 || mbclen == 0
455250724Sjkim		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
456146040Stjr	  {
457146040Stjr	    /* It is an invalid character or '\0'.  Just use the byte.  */
458146040Stjr	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
459146040Stjr
460146040Stjr	    if (BE (pstr->trans != NULL, 0))
461146040Stjr	      ch = pstr->trans [ch];
462146040Stjr	    pstr->mbs[byte_idx] = ch;
463146040Stjr
464146040Stjr	    if (BE (pstr->offsets_needed != 0, 0))
465146040Stjr	      pstr->offsets[byte_idx] = src_idx;
466146040Stjr	    ++src_idx;
467146040Stjr
468146040Stjr	    /* And also cast it to wide char.  */
469146040Stjr	    pstr->wcs[byte_idx++] = (wchar_t) ch;
470146040Stjr	    if (BE (mbclen == (size_t) -1, 0))
471146040Stjr	      pstr->cur_state = prev_st;
472146040Stjr	  }
473146040Stjr	else
474146040Stjr	  {
475146040Stjr	    /* The buffer doesn't have enough space, finish to build.  */
476146040Stjr	    pstr->cur_state = prev_st;
477146040Stjr	    break;
478146040Stjr	  }
479146040Stjr      }
480146040Stjr  pstr->valid_len = byte_idx;
481146040Stjr  pstr->valid_raw_len = src_idx;
482146040Stjr  return REG_NOERROR;
483146040Stjr}
484146040Stjr
485146040Stjr/* Skip characters until the index becomes greater than NEW_RAW_IDX.
486146040Stjr   Return the index.  */
487146040Stjr
488146040Stjrstatic int
489250724Sjkiminternal_function
490250724Sjkimre_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
491146040Stjr{
492146040Stjr  mbstate_t prev_st;
493146040Stjr  int rawbuf_idx;
494146040Stjr  size_t mbclen;
495250724Sjkim  wint_t wc = WEOF;
496146040Stjr
497146040Stjr  /* Skip the characters which are not necessary to check.  */
498146040Stjr  for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
499146040Stjr       rawbuf_idx < new_raw_idx;)
500146040Stjr    {
501250724Sjkim      wchar_t wc2;
502250724Sjkim      int remain_len = pstr->raw_len - rawbuf_idx;
503146040Stjr      prev_st = pstr->cur_state;
504250724Sjkim      mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
505250724Sjkim			  remain_len, &pstr->cur_state);
506250724Sjkim      if (BE ((ssize_t) mbclen <= 0, 0))
507146040Stjr	{
508250724Sjkim	  /* We treat these cases as a single byte character.  */
509250724Sjkim	  if (mbclen == 0 || remain_len == 0)
510250724Sjkim	    wc = L'\0';
511250724Sjkim	  else
512250724Sjkim	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
513146040Stjr	  mbclen = 1;
514146040Stjr	  pstr->cur_state = prev_st;
515146040Stjr	}
516250724Sjkim      else
517250724Sjkim	wc = (wint_t) wc2;
518146040Stjr      /* Then proceed the next character.  */
519146040Stjr      rawbuf_idx += mbclen;
520146040Stjr    }
521250724Sjkim  *last_wc = wc;
522146040Stjr  return rawbuf_idx;
523146040Stjr}
524146040Stjr#endif /* RE_ENABLE_I18N  */
525146040Stjr
526146040Stjr/* Build the buffer PSTR->MBS, and apply the translation if we need.
527146040Stjr   This function is used in case of REG_ICASE.  */
528146040Stjr
529146040Stjrstatic void
530250724Sjkiminternal_function
531250724Sjkimbuild_upper_buffer (re_string_t *pstr)
532146040Stjr{
533146040Stjr  int char_idx, end_idx;
534146040Stjr  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
535146040Stjr
536146040Stjr  for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
537146040Stjr    {
538146040Stjr      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
539146040Stjr      if (BE (pstr->trans != NULL, 0))
540146040Stjr	ch = pstr->trans[ch];
541146040Stjr      if (islower (ch))
542146040Stjr	pstr->mbs[char_idx] = toupper (ch);
543146040Stjr      else
544146040Stjr	pstr->mbs[char_idx] = ch;
545146040Stjr    }
546146040Stjr  pstr->valid_len = char_idx;
547146040Stjr  pstr->valid_raw_len = char_idx;
548146040Stjr}
549146040Stjr
550146040Stjr/* Apply TRANS to the buffer in PSTR.  */
551146040Stjr
552146040Stjrstatic void
553250724Sjkiminternal_function
554250724Sjkimre_string_translate_buffer (re_string_t *pstr)
555146040Stjr{
556146040Stjr  int buf_idx, end_idx;
557146040Stjr  end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
558146040Stjr
559146040Stjr  for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
560146040Stjr    {
561146040Stjr      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
562146040Stjr      pstr->mbs[buf_idx] = pstr->trans[ch];
563146040Stjr    }
564146040Stjr
565146040Stjr  pstr->valid_len = buf_idx;
566146040Stjr  pstr->valid_raw_len = buf_idx;
567146040Stjr}
568146040Stjr
569146040Stjr/* This function re-construct the buffers.
570146040Stjr   Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
571146040Stjr   convert to upper case in case of REG_ICASE, apply translation.  */
572146040Stjr
573146040Stjrstatic reg_errcode_t
574250724Sjkiminternal_function __attribute_warn_unused_result__
575250724Sjkimre_string_reconstruct (re_string_t *pstr, int idx, int eflags)
576146040Stjr{
577146040Stjr  int offset = idx - pstr->raw_mbs_idx;
578146040Stjr  if (BE (offset < 0, 0))
579146040Stjr    {
580146040Stjr      /* Reset buffer.  */
581146040Stjr#ifdef RE_ENABLE_I18N
582146040Stjr      if (pstr->mb_cur_max > 1)
583146040Stjr	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
584146040Stjr#endif /* RE_ENABLE_I18N */
585146040Stjr      pstr->len = pstr->raw_len;
586146040Stjr      pstr->stop = pstr->raw_stop;
587146040Stjr      pstr->valid_len = 0;
588146040Stjr      pstr->raw_mbs_idx = 0;
589146040Stjr      pstr->valid_raw_len = 0;
590146040Stjr      pstr->offsets_needed = 0;
591146040Stjr      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
592146040Stjr			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
593146040Stjr      if (!pstr->mbs_allocated)
594146040Stjr	pstr->mbs = (unsigned char *) pstr->raw_mbs;
595146040Stjr      offset = idx;
596146040Stjr    }
597146040Stjr
598146040Stjr  if (BE (offset != 0, 1))
599146040Stjr    {
600250724Sjkim      /* Should the already checked characters be kept?  */
601250724Sjkim      if (BE (offset < pstr->valid_raw_len, 1))
602146040Stjr	{
603146040Stjr	  /* Yes, move them to the front of the buffer.  */
604146040Stjr#ifdef RE_ENABLE_I18N
605250724Sjkim	  if (BE (pstr->offsets_needed, 0))
606250724Sjkim	    {
607250724Sjkim	      int low = 0, high = pstr->valid_len, mid;
608250724Sjkim	      do
609250724Sjkim		{
610250724Sjkim		  mid = (high + low) / 2;
611250724Sjkim		  if (pstr->offsets[mid] > offset)
612250724Sjkim		    high = mid;
613250724Sjkim		  else if (pstr->offsets[mid] < offset)
614250724Sjkim		    low = mid + 1;
615250724Sjkim		  else
616250724Sjkim		    break;
617250724Sjkim		}
618250724Sjkim	      while (low < high);
619250724Sjkim	      if (pstr->offsets[mid] < offset)
620250724Sjkim		++mid;
621250724Sjkim	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
622250724Sjkim							eflags);
623250724Sjkim	      /* This can be quite complicated, so handle specially
624250724Sjkim		 only the common and easy case where the character with
625250724Sjkim		 different length representation of lower and upper
626250724Sjkim		 case is present at or after offset.  */
627250724Sjkim	      if (pstr->valid_len > offset
628250724Sjkim		  && mid == offset && pstr->offsets[mid] == offset)
629250724Sjkim		{
630250724Sjkim		  memmove (pstr->wcs, pstr->wcs + offset,
631250724Sjkim			   (pstr->valid_len - offset) * sizeof (wint_t));
632250724Sjkim		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
633250724Sjkim		  pstr->valid_len -= offset;
634250724Sjkim		  pstr->valid_raw_len -= offset;
635250724Sjkim		  for (low = 0; low < pstr->valid_len; low++)
636250724Sjkim		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
637250724Sjkim		}
638250724Sjkim	      else
639250724Sjkim		{
640250724Sjkim		  /* Otherwise, just find out how long the partial multibyte
641250724Sjkim		     character at offset is and fill it with WEOF/255.  */
642250724Sjkim		  pstr->len = pstr->raw_len - idx + offset;
643250724Sjkim		  pstr->stop = pstr->raw_stop - idx + offset;
644250724Sjkim		  pstr->offsets_needed = 0;
645250724Sjkim		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
646250724Sjkim		    --mid;
647250724Sjkim		  while (mid < pstr->valid_len)
648250724Sjkim		    if (pstr->wcs[mid] != WEOF)
649250724Sjkim		      break;
650250724Sjkim		    else
651250724Sjkim		      ++mid;
652250724Sjkim		  if (mid == pstr->valid_len)
653250724Sjkim		    pstr->valid_len = 0;
654250724Sjkim		  else
655250724Sjkim		    {
656250724Sjkim		      pstr->valid_len = pstr->offsets[mid] - offset;
657250724Sjkim		      if (pstr->valid_len)
658250724Sjkim			{
659250724Sjkim			  for (low = 0; low < pstr->valid_len; ++low)
660250724Sjkim			    pstr->wcs[low] = WEOF;
661250724Sjkim			  memset (pstr->mbs, 255, pstr->valid_len);
662250724Sjkim			}
663250724Sjkim		    }
664250724Sjkim		  pstr->valid_raw_len = pstr->valid_len;
665250724Sjkim		}
666250724Sjkim	    }
667250724Sjkim	  else
668250724Sjkim#endif
669250724Sjkim	    {
670250724Sjkim	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
671250724Sjkim							eflags);
672250724Sjkim#ifdef RE_ENABLE_I18N
673250724Sjkim	      if (pstr->mb_cur_max > 1)
674250724Sjkim		memmove (pstr->wcs, pstr->wcs + offset,
675250724Sjkim			 (pstr->valid_len - offset) * sizeof (wint_t));
676146040Stjr#endif /* RE_ENABLE_I18N */
677250724Sjkim	      if (BE (pstr->mbs_allocated, 0))
678250724Sjkim		memmove (pstr->mbs, pstr->mbs + offset,
679250724Sjkim			 pstr->valid_len - offset);
680250724Sjkim	      pstr->valid_len -= offset;
681250724Sjkim	      pstr->valid_raw_len -= offset;
682146040Stjr#if DEBUG
683250724Sjkim	      assert (pstr->valid_len > 0);
684146040Stjr#endif
685250724Sjkim	    }
686146040Stjr	}
687146040Stjr      else
688146040Stjr	{
689146040Stjr	  /* No, skip all characters until IDX.  */
690250724Sjkim	  int prev_valid_len = pstr->valid_len;
691250724Sjkim
692146040Stjr#ifdef RE_ENABLE_I18N
693146040Stjr	  if (BE (pstr->offsets_needed, 0))
694146040Stjr	    {
695146040Stjr	      pstr->len = pstr->raw_len - idx + offset;
696146040Stjr	      pstr->stop = pstr->raw_stop - idx + offset;
697146040Stjr	      pstr->offsets_needed = 0;
698146040Stjr	    }
699146040Stjr#endif
700146040Stjr	  pstr->valid_len = 0;
701146040Stjr#ifdef RE_ENABLE_I18N
702146040Stjr	  if (pstr->mb_cur_max > 1)
703146040Stjr	    {
704146040Stjr	      int wcs_idx;
705146040Stjr	      wint_t wc = WEOF;
706146040Stjr
707146040Stjr	      if (pstr->is_utf8)
708146040Stjr		{
709250724Sjkim		  const unsigned char *raw, *p, *end;
710146040Stjr
711146040Stjr		  /* Special case UTF-8.  Multi-byte chars start with any
712146040Stjr		     byte other than 0x80 - 0xbf.  */
713146040Stjr		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
714146040Stjr		  end = raw + (offset - pstr->mb_cur_max);
715250724Sjkim		  if (end < pstr->raw_mbs)
716250724Sjkim		    end = pstr->raw_mbs;
717250724Sjkim		  p = raw + offset - 1;
718250724Sjkim#ifdef _LIBC
719250724Sjkim		  /* We know the wchar_t encoding is UCS4, so for the simple
720250724Sjkim		     case, ASCII characters, skip the conversion step.  */
721250724Sjkim		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
722250724Sjkim		    {
723250724Sjkim		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
724250724Sjkim		      /* pstr->valid_len = 0; */
725250724Sjkim		      wc = (wchar_t) *p;
726250724Sjkim		    }
727250724Sjkim		  else
728250724Sjkim#endif
729250724Sjkim		    for (; p >= end; --p)
730250724Sjkim		      if ((*p & 0xc0) != 0x80)
731250724Sjkim			{
732250724Sjkim			  mbstate_t cur_state;
733250724Sjkim			  wchar_t wc2;
734250724Sjkim			  int mlen = raw + pstr->len - p;
735250724Sjkim			  unsigned char buf[6];
736250724Sjkim			  size_t mbclen;
737146040Stjr
738250724Sjkim			  const unsigned char *pp = p;
739250724Sjkim			  if (BE (pstr->trans != NULL, 0))
740250724Sjkim			    {
741250724Sjkim			      int i = mlen < 6 ? mlen : 6;
742250724Sjkim			      while (--i >= 0)
743250724Sjkim				buf[i] = pstr->trans[p[i]];
744250724Sjkim			      pp = buf;
745250724Sjkim			    }
746250724Sjkim			  /* XXX Don't use mbrtowc, we know which conversion
747250724Sjkim			     to use (UTF-8 -> UCS4).  */
748250724Sjkim			  memset (&cur_state, 0, sizeof (cur_state));
749250724Sjkim			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
750250724Sjkim					      &cur_state);
751250724Sjkim			  if (raw + offset - p <= mbclen
752250724Sjkim			      && mbclen < (size_t) -2)
753250724Sjkim			    {
754250724Sjkim			      memset (&pstr->cur_state, '\0',
755250724Sjkim				      sizeof (mbstate_t));
756250724Sjkim			      pstr->valid_len = mbclen - (raw + offset - p);
757250724Sjkim			      wc = wc2;
758250724Sjkim			    }
759250724Sjkim			  break;
760250724Sjkim			}
761146040Stjr		}
762146040Stjr
763146040Stjr	      if (wc == WEOF)
764146040Stjr		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
765250724Sjkim	      if (wc == WEOF)
766250724Sjkim		pstr->tip_context
767250724Sjkim		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
768250724Sjkim	      else
769250724Sjkim		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
770250724Sjkim				      && IS_WIDE_WORD_CHAR (wc))
771250724Sjkim				     ? CONTEXT_WORD
772250724Sjkim				     : ((IS_WIDE_NEWLINE (wc)
773250724Sjkim					 && pstr->newline_anchor)
774250724Sjkim					? CONTEXT_NEWLINE : 0));
775146040Stjr	      if (BE (pstr->valid_len, 0))
776146040Stjr		{
777146040Stjr		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
778146040Stjr		    pstr->wcs[wcs_idx] = WEOF;
779146040Stjr		  if (pstr->mbs_allocated)
780146040Stjr		    memset (pstr->mbs, 255, pstr->valid_len);
781146040Stjr		}
782146040Stjr	      pstr->valid_raw_len = pstr->valid_len;
783146040Stjr	    }
784146040Stjr	  else
785146040Stjr#endif /* RE_ENABLE_I18N */
786146040Stjr	    {
787146040Stjr	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
788250724Sjkim	      pstr->valid_raw_len = 0;
789146040Stjr	      if (pstr->trans)
790146040Stjr		c = pstr->trans[c];
791146040Stjr	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
792146040Stjr				   ? CONTEXT_WORD
793146040Stjr				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
794146040Stjr				      ? CONTEXT_NEWLINE : 0));
795146040Stjr	    }
796146040Stjr	}
797146040Stjr      if (!BE (pstr->mbs_allocated, 0))
798146040Stjr	pstr->mbs += offset;
799146040Stjr    }
800146040Stjr  pstr->raw_mbs_idx = idx;
801146040Stjr  pstr->len -= offset;
802146040Stjr  pstr->stop -= offset;
803146040Stjr
804146040Stjr  /* Then build the buffers.  */
805146040Stjr#ifdef RE_ENABLE_I18N
806146040Stjr  if (pstr->mb_cur_max > 1)
807146040Stjr    {
808146040Stjr      if (pstr->icase)
809146040Stjr	{
810250724Sjkim	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
811146040Stjr	  if (BE (ret != REG_NOERROR, 0))
812146040Stjr	    return ret;
813146040Stjr	}
814146040Stjr      else
815146040Stjr	build_wcs_buffer (pstr);
816146040Stjr    }
817146040Stjr  else
818146040Stjr#endif /* RE_ENABLE_I18N */
819250724Sjkim    if (BE (pstr->mbs_allocated, 0))
820250724Sjkim      {
821250724Sjkim	if (pstr->icase)
822250724Sjkim	  build_upper_buffer (pstr);
823250724Sjkim	else if (pstr->trans != NULL)
824250724Sjkim	  re_string_translate_buffer (pstr);
825250724Sjkim      }
826250724Sjkim    else
827250724Sjkim      pstr->valid_len = pstr->len;
828146040Stjr
829146040Stjr  pstr->cur_idx = 0;
830146040Stjr  return REG_NOERROR;
831146040Stjr}
832146040Stjr
833146040Stjrstatic unsigned char
834250724Sjkiminternal_function __attribute ((pure))
835250724Sjkimre_string_peek_byte_case (const re_string_t *pstr, int idx)
836146040Stjr{
837146040Stjr  int ch, off;
838146040Stjr
839146040Stjr  /* Handle the common (easiest) cases first.  */
840146040Stjr  if (BE (!pstr->mbs_allocated, 1))
841146040Stjr    return re_string_peek_byte (pstr, idx);
842146040Stjr
843146040Stjr#ifdef RE_ENABLE_I18N
844146040Stjr  if (pstr->mb_cur_max > 1
845146040Stjr      && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
846146040Stjr    return re_string_peek_byte (pstr, idx);
847146040Stjr#endif
848146040Stjr
849146040Stjr  off = pstr->cur_idx + idx;
850146040Stjr#ifdef RE_ENABLE_I18N
851146040Stjr  if (pstr->offsets_needed)
852146040Stjr    off = pstr->offsets[off];
853146040Stjr#endif
854146040Stjr
855146040Stjr  ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
856146040Stjr
857146040Stjr#ifdef RE_ENABLE_I18N
858146040Stjr  /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
859146040Stjr     this function returns CAPITAL LETTER I instead of first byte of
860146040Stjr     DOTLESS SMALL LETTER I.  The latter would confuse the parser,
861146040Stjr     since peek_byte_case doesn't advance cur_idx in any way.  */
862146040Stjr  if (pstr->offsets_needed && !isascii (ch))
863146040Stjr    return re_string_peek_byte (pstr, idx);
864146040Stjr#endif
865146040Stjr
866146040Stjr  return ch;
867146040Stjr}
868146040Stjr
869146040Stjrstatic unsigned char
870250724Sjkiminternal_function
871250724Sjkimre_string_fetch_byte_case (re_string_t *pstr)
872146040Stjr{
873146040Stjr  if (BE (!pstr->mbs_allocated, 1))
874146040Stjr    return re_string_fetch_byte (pstr);
875146040Stjr
876146040Stjr#ifdef RE_ENABLE_I18N
877146040Stjr  if (pstr->offsets_needed)
878146040Stjr    {
879146040Stjr      int off, ch;
880146040Stjr
881146040Stjr      /* For tr_TR.UTF-8 [[:islower:]] there is
882146040Stjr	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
883146040Stjr	 in that case the whole multi-byte character and return
884146040Stjr	 the original letter.  On the other side, with
885146040Stjr	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
886146040Stjr	 anything else would complicate things too much.  */
887146040Stjr
888146040Stjr      if (!re_string_first_byte (pstr, pstr->cur_idx))
889146040Stjr	return re_string_fetch_byte (pstr);
890146040Stjr
891146040Stjr      off = pstr->offsets[pstr->cur_idx];
892146040Stjr      ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
893146040Stjr
894146040Stjr      if (! isascii (ch))
895146040Stjr	return re_string_fetch_byte (pstr);
896146040Stjr
897146040Stjr      re_string_skip_bytes (pstr,
898146040Stjr			    re_string_char_size_at (pstr, pstr->cur_idx));
899146040Stjr      return ch;
900146040Stjr    }
901146040Stjr#endif
902146040Stjr
903146040Stjr  return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
904146040Stjr}
905146040Stjr
906146040Stjrstatic void
907250724Sjkiminternal_function
908250724Sjkimre_string_destruct (re_string_t *pstr)
909146040Stjr{
910146040Stjr#ifdef RE_ENABLE_I18N
911146040Stjr  re_free (pstr->wcs);
912146040Stjr  re_free (pstr->offsets);
913146040Stjr#endif /* RE_ENABLE_I18N  */
914146040Stjr  if (pstr->mbs_allocated)
915146040Stjr    re_free (pstr->mbs);
916146040Stjr}
917146040Stjr
918146040Stjr/* Return the context at IDX in INPUT.  */
919146040Stjr
920146040Stjrstatic unsigned int
921250724Sjkiminternal_function
922250724Sjkimre_string_context_at (const re_string_t *input, int idx, int eflags)
923146040Stjr{
924146040Stjr  int c;
925146040Stjr  if (BE (idx < 0, 0))
926146040Stjr    /* In this case, we use the value stored in input->tip_context,
927146040Stjr       since we can't know the character in input->mbs[-1] here.  */
928146040Stjr    return input->tip_context;
929146040Stjr  if (BE (idx == input->len, 0))
930146040Stjr    return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
931146040Stjr	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
932146040Stjr#ifdef RE_ENABLE_I18N
933146040Stjr  if (input->mb_cur_max > 1)
934146040Stjr    {
935146040Stjr      wint_t wc;
936146040Stjr      int wc_idx = idx;
937146040Stjr      while(input->wcs[wc_idx] == WEOF)
938146040Stjr	{
939146040Stjr#ifdef DEBUG
940146040Stjr	  /* It must not happen.  */
941146040Stjr	  assert (wc_idx >= 0);
942146040Stjr#endif
943146040Stjr	  --wc_idx;
944146040Stjr	  if (wc_idx < 0)
945146040Stjr	    return input->tip_context;
946146040Stjr	}
947146040Stjr      wc = input->wcs[wc_idx];
948146040Stjr      if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
949146040Stjr	return CONTEXT_WORD;
950146040Stjr      return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
951146040Stjr	      ? CONTEXT_NEWLINE : 0);
952146040Stjr    }
953146040Stjr  else
954146040Stjr#endif
955146040Stjr    {
956146040Stjr      c = re_string_byte_at (input, idx);
957146040Stjr      if (bitset_contain (input->word_char, c))
958146040Stjr	return CONTEXT_WORD;
959146040Stjr      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
960146040Stjr    }
961146040Stjr}
962146040Stjr
963146040Stjr/* Functions for set operation.  */
964146040Stjr
965146040Stjrstatic reg_errcode_t
966250724Sjkiminternal_function __attribute_warn_unused_result__
967250724Sjkimre_node_set_alloc (re_node_set *set, int size)
968146040Stjr{
969146040Stjr  set->alloc = size;
970146040Stjr  set->nelem = 0;
971146040Stjr  set->elems = re_malloc (int, size);
972146040Stjr  if (BE (set->elems == NULL, 0))
973146040Stjr    return REG_ESPACE;
974146040Stjr  return REG_NOERROR;
975146040Stjr}
976146040Stjr
977146040Stjrstatic reg_errcode_t
978250724Sjkiminternal_function __attribute_warn_unused_result__
979250724Sjkimre_node_set_init_1 (re_node_set *set, int elem)
980146040Stjr{
981146040Stjr  set->alloc = 1;
982146040Stjr  set->nelem = 1;
983146040Stjr  set->elems = re_malloc (int, 1);
984146040Stjr  if (BE (set->elems == NULL, 0))
985146040Stjr    {
986146040Stjr      set->alloc = set->nelem = 0;
987146040Stjr      return REG_ESPACE;
988146040Stjr    }
989146040Stjr  set->elems[0] = elem;
990146040Stjr  return REG_NOERROR;
991146040Stjr}
992146040Stjr
993146040Stjrstatic reg_errcode_t
994250724Sjkiminternal_function __attribute_warn_unused_result__
995250724Sjkimre_node_set_init_2 (re_node_set *set, int elem1, int elem2)
996146040Stjr{
997146040Stjr  set->alloc = 2;
998146040Stjr  set->elems = re_malloc (int, 2);
999146040Stjr  if (BE (set->elems == NULL, 0))
1000146040Stjr    return REG_ESPACE;
1001146040Stjr  if (elem1 == elem2)
1002146040Stjr    {
1003146040Stjr      set->nelem = 1;
1004146040Stjr      set->elems[0] = elem1;
1005146040Stjr    }
1006146040Stjr  else
1007146040Stjr    {
1008146040Stjr      set->nelem = 2;
1009146040Stjr      if (elem1 < elem2)
1010146040Stjr	{
1011146040Stjr	  set->elems[0] = elem1;
1012146040Stjr	  set->elems[1] = elem2;
1013146040Stjr	}
1014146040Stjr      else
1015146040Stjr	{
1016146040Stjr	  set->elems[0] = elem2;
1017146040Stjr	  set->elems[1] = elem1;
1018146040Stjr	}
1019146040Stjr    }
1020146040Stjr  return REG_NOERROR;
1021146040Stjr}
1022146040Stjr
1023146040Stjrstatic reg_errcode_t
1024250724Sjkiminternal_function __attribute_warn_unused_result__
1025250724Sjkimre_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1026146040Stjr{
1027146040Stjr  dest->nelem = src->nelem;
1028146040Stjr  if (src->nelem > 0)
1029146040Stjr    {
1030146040Stjr      dest->alloc = dest->nelem;
1031146040Stjr      dest->elems = re_malloc (int, dest->alloc);
1032146040Stjr      if (BE (dest->elems == NULL, 0))
1033146040Stjr	{
1034146040Stjr	  dest->alloc = dest->nelem = 0;
1035146040Stjr	  return REG_ESPACE;
1036146040Stjr	}
1037146040Stjr      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1038146040Stjr    }
1039146040Stjr  else
1040146040Stjr    re_node_set_init_empty (dest);
1041146040Stjr  return REG_NOERROR;
1042146040Stjr}
1043146040Stjr
1044146040Stjr/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1045146040Stjr   DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1046146040Stjr   Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
1047146040Stjr
1048146040Stjrstatic reg_errcode_t
1049250724Sjkiminternal_function __attribute_warn_unused_result__
1050250724Sjkimre_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1051250724Sjkim			   const re_node_set *src2)
1052146040Stjr{
1053146040Stjr  int i1, i2, is, id, delta, sbase;
1054146040Stjr  if (src1->nelem == 0 || src2->nelem == 0)
1055146040Stjr    return REG_NOERROR;
1056146040Stjr
1057146040Stjr  /* We need dest->nelem + 2 * elems_in_intersection; this is a
1058146040Stjr     conservative estimate.  */
1059146040Stjr  if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1060146040Stjr    {
1061146040Stjr      int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1062146040Stjr      int *new_elems = re_realloc (dest->elems, int, new_alloc);
1063146040Stjr      if (BE (new_elems == NULL, 0))
1064250724Sjkim	return REG_ESPACE;
1065146040Stjr      dest->elems = new_elems;
1066146040Stjr      dest->alloc = new_alloc;
1067146040Stjr    }
1068146040Stjr
1069146040Stjr  /* Find the items in the intersection of SRC1 and SRC2, and copy
1070146040Stjr     into the top of DEST those that are not already in DEST itself.  */
1071146040Stjr  sbase = dest->nelem + src1->nelem + src2->nelem;
1072146040Stjr  i1 = src1->nelem - 1;
1073146040Stjr  i2 = src2->nelem - 1;
1074146040Stjr  id = dest->nelem - 1;
1075146040Stjr  for (;;)
1076146040Stjr    {
1077146040Stjr      if (src1->elems[i1] == src2->elems[i2])
1078146040Stjr	{
1079146040Stjr	  /* Try to find the item in DEST.  Maybe we could binary search?  */
1080146040Stjr	  while (id >= 0 && dest->elems[id] > src1->elems[i1])
1081146040Stjr	    --id;
1082146040Stjr
1083250724Sjkim	  if (id < 0 || dest->elems[id] != src1->elems[i1])
1084250724Sjkim	    dest->elems[--sbase] = src1->elems[i1];
1085146040Stjr
1086146040Stjr	  if (--i1 < 0 || --i2 < 0)
1087146040Stjr	    break;
1088146040Stjr	}
1089146040Stjr
1090146040Stjr      /* Lower the highest of the two items.  */
1091146040Stjr      else if (src1->elems[i1] < src2->elems[i2])
1092146040Stjr	{
1093146040Stjr	  if (--i2 < 0)
1094146040Stjr	    break;
1095146040Stjr	}
1096146040Stjr      else
1097146040Stjr	{
1098146040Stjr	  if (--i1 < 0)
1099146040Stjr	    break;
1100146040Stjr	}
1101146040Stjr    }
1102146040Stjr
1103146040Stjr  id = dest->nelem - 1;
1104146040Stjr  is = dest->nelem + src1->nelem + src2->nelem - 1;
1105146040Stjr  delta = is - sbase + 1;
1106146040Stjr
1107146040Stjr  /* Now copy.  When DELTA becomes zero, the remaining
1108146040Stjr     DEST elements are already in place; this is more or
1109146040Stjr     less the same loop that is in re_node_set_merge.  */
1110146040Stjr  dest->nelem += delta;
1111146040Stjr  if (delta > 0 && id >= 0)
1112146040Stjr    for (;;)
1113146040Stjr      {
1114250724Sjkim	if (dest->elems[is] > dest->elems[id])
1115250724Sjkim	  {
1116250724Sjkim	    /* Copy from the top.  */
1117250724Sjkim	    dest->elems[id + delta--] = dest->elems[is--];
1118250724Sjkim	    if (delta == 0)
1119250724Sjkim	      break;
1120250724Sjkim	  }
1121250724Sjkim	else
1122250724Sjkim	  {
1123250724Sjkim	    /* Slide from the bottom.  */
1124250724Sjkim	    dest->elems[id + delta] = dest->elems[id];
1125250724Sjkim	    if (--id < 0)
1126250724Sjkim	      break;
1127250724Sjkim	  }
1128146040Stjr      }
1129146040Stjr
1130146040Stjr  /* Copy remaining SRC elements.  */
1131146040Stjr  memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int));
1132146040Stjr
1133146040Stjr  return REG_NOERROR;
1134146040Stjr}
1135146040Stjr
1136146040Stjr/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1137146040Stjr   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1138146040Stjr
1139146040Stjrstatic reg_errcode_t
1140250724Sjkiminternal_function __attribute_warn_unused_result__
1141250724Sjkimre_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1142250724Sjkim			const re_node_set *src2)
1143146040Stjr{
1144146040Stjr  int i1, i2, id;
1145146040Stjr  if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1146146040Stjr    {
1147146040Stjr      dest->alloc = src1->nelem + src2->nelem;
1148146040Stjr      dest->elems = re_malloc (int, dest->alloc);
1149146040Stjr      if (BE (dest->elems == NULL, 0))
1150146040Stjr	return REG_ESPACE;
1151146040Stjr    }
1152146040Stjr  else
1153146040Stjr    {
1154146040Stjr      if (src1 != NULL && src1->nelem > 0)
1155146040Stjr	return re_node_set_init_copy (dest, src1);
1156146040Stjr      else if (src2 != NULL && src2->nelem > 0)
1157146040Stjr	return re_node_set_init_copy (dest, src2);
1158146040Stjr      else
1159146040Stjr	re_node_set_init_empty (dest);
1160146040Stjr      return REG_NOERROR;
1161146040Stjr    }
1162146040Stjr  for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1163146040Stjr    {
1164146040Stjr      if (src1->elems[i1] > src2->elems[i2])
1165146040Stjr	{
1166146040Stjr	  dest->elems[id++] = src2->elems[i2++];
1167146040Stjr	  continue;
1168146040Stjr	}
1169146040Stjr      if (src1->elems[i1] == src2->elems[i2])
1170146040Stjr	++i2;
1171146040Stjr      dest->elems[id++] = src1->elems[i1++];
1172146040Stjr    }
1173146040Stjr  if (i1 < src1->nelem)
1174146040Stjr    {
1175146040Stjr      memcpy (dest->elems + id, src1->elems + i1,
1176146040Stjr	     (src1->nelem - i1) * sizeof (int));
1177146040Stjr      id += src1->nelem - i1;
1178146040Stjr    }
1179146040Stjr  else if (i2 < src2->nelem)
1180146040Stjr    {
1181146040Stjr      memcpy (dest->elems + id, src2->elems + i2,
1182146040Stjr	     (src2->nelem - i2) * sizeof (int));
1183146040Stjr      id += src2->nelem - i2;
1184146040Stjr    }
1185146040Stjr  dest->nelem = id;
1186146040Stjr  return REG_NOERROR;
1187146040Stjr}
1188146040Stjr
1189146040Stjr/* Calculate the union set of the sets DEST and SRC. And store it to
1190146040Stjr   DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1191146040Stjr
1192146040Stjrstatic reg_errcode_t
1193250724Sjkiminternal_function __attribute_warn_unused_result__
1194250724Sjkimre_node_set_merge (re_node_set *dest, const re_node_set *src)
1195146040Stjr{
1196146040Stjr  int is, id, sbase, delta;
1197146040Stjr  if (src == NULL || src->nelem == 0)
1198146040Stjr    return REG_NOERROR;
1199146040Stjr  if (dest->alloc < 2 * src->nelem + dest->nelem)
1200146040Stjr    {
1201146040Stjr      int new_alloc = 2 * (src->nelem + dest->alloc);
1202146040Stjr      int *new_buffer = re_realloc (dest->elems, int, new_alloc);
1203146040Stjr      if (BE (new_buffer == NULL, 0))
1204146040Stjr	return REG_ESPACE;
1205146040Stjr      dest->elems = new_buffer;
1206146040Stjr      dest->alloc = new_alloc;
1207146040Stjr    }
1208146040Stjr
1209146040Stjr  if (BE (dest->nelem == 0, 0))
1210146040Stjr    {
1211146040Stjr      dest->nelem = src->nelem;
1212146040Stjr      memcpy (dest->elems, src->elems, src->nelem * sizeof (int));
1213146040Stjr      return REG_NOERROR;
1214146040Stjr    }
1215146040Stjr
1216146040Stjr  /* Copy into the top of DEST the items of SRC that are not
1217146040Stjr     found in DEST.  Maybe we could binary search in DEST?  */
1218146040Stjr  for (sbase = dest->nelem + 2 * src->nelem,
1219146040Stjr       is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1220146040Stjr    {
1221146040Stjr      if (dest->elems[id] == src->elems[is])
1222250724Sjkim	is--, id--;
1223146040Stjr      else if (dest->elems[id] < src->elems[is])
1224250724Sjkim	dest->elems[--sbase] = src->elems[is--];
1225146040Stjr      else /* if (dest->elems[id] > src->elems[is]) */
1226250724Sjkim	--id;
1227146040Stjr    }
1228146040Stjr
1229146040Stjr  if (is >= 0)
1230146040Stjr    {
1231146040Stjr      /* If DEST is exhausted, the remaining items of SRC must be unique.  */
1232146040Stjr      sbase -= is + 1;
1233146040Stjr      memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int));
1234146040Stjr    }
1235146040Stjr
1236146040Stjr  id = dest->nelem - 1;
1237146040Stjr  is = dest->nelem + 2 * src->nelem - 1;
1238146040Stjr  delta = is - sbase + 1;
1239146040Stjr  if (delta == 0)
1240146040Stjr    return REG_NOERROR;
1241146040Stjr
1242146040Stjr  /* Now copy.  When DELTA becomes zero, the remaining
1243146040Stjr     DEST elements are already in place.  */
1244146040Stjr  dest->nelem += delta;
1245146040Stjr  for (;;)
1246146040Stjr    {
1247146040Stjr      if (dest->elems[is] > dest->elems[id])
1248250724Sjkim	{
1249146040Stjr	  /* Copy from the top.  */
1250250724Sjkim	  dest->elems[id + delta--] = dest->elems[is--];
1251146040Stjr	  if (delta == 0)
1252146040Stjr	    break;
1253146040Stjr	}
1254146040Stjr      else
1255250724Sjkim	{
1256250724Sjkim	  /* Slide from the bottom.  */
1257250724Sjkim	  dest->elems[id + delta] = dest->elems[id];
1258146040Stjr	  if (--id < 0)
1259146040Stjr	    {
1260146040Stjr	      /* Copy remaining SRC elements.  */
1261146040Stjr	      memcpy (dest->elems, dest->elems + sbase,
1262250724Sjkim		      delta * sizeof (int));
1263146040Stjr	      break;
1264146040Stjr	    }
1265146040Stjr	}
1266146040Stjr    }
1267146040Stjr
1268146040Stjr  return REG_NOERROR;
1269146040Stjr}
1270146040Stjr
1271146040Stjr/* Insert the new element ELEM to the re_node_set* SET.
1272146040Stjr   SET should not already have ELEM.
1273146040Stjr   return -1 if an error is occured, return 1 otherwise.  */
1274146040Stjr
1275146040Stjrstatic int
1276250724Sjkiminternal_function __attribute_warn_unused_result__
1277250724Sjkimre_node_set_insert (re_node_set *set, int elem)
1278146040Stjr{
1279146040Stjr  int idx;
1280146040Stjr  /* In case the set is empty.  */
1281146040Stjr  if (set->alloc == 0)
1282146040Stjr    {
1283146040Stjr      if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
1284146040Stjr	return 1;
1285146040Stjr      else
1286146040Stjr	return -1;
1287146040Stjr    }
1288146040Stjr
1289146040Stjr  if (BE (set->nelem, 0) == 0)
1290146040Stjr    {
1291146040Stjr      /* We already guaranteed above that set->alloc != 0.  */
1292146040Stjr      set->elems[0] = elem;
1293146040Stjr      ++set->nelem;
1294146040Stjr      return 1;
1295146040Stjr    }
1296146040Stjr
1297146040Stjr  /* Realloc if we need.  */
1298146040Stjr  if (set->alloc == set->nelem)
1299146040Stjr    {
1300250724Sjkim      int *new_elems;
1301146040Stjr      set->alloc = set->alloc * 2;
1302250724Sjkim      new_elems = re_realloc (set->elems, int, set->alloc);
1303250724Sjkim      if (BE (new_elems == NULL, 0))
1304146040Stjr	return -1;
1305250724Sjkim      set->elems = new_elems;
1306146040Stjr    }
1307146040Stjr
1308146040Stjr  /* Move the elements which follows the new element.  Test the
1309146040Stjr     first element separately to skip a check in the inner loop.  */
1310146040Stjr  if (elem < set->elems[0])
1311146040Stjr    {
1312146040Stjr      idx = 0;
1313146040Stjr      for (idx = set->nelem; idx > 0; idx--)
1314250724Sjkim	set->elems[idx] = set->elems[idx - 1];
1315146040Stjr    }
1316146040Stjr  else
1317146040Stjr    {
1318146040Stjr      for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1319250724Sjkim	set->elems[idx] = set->elems[idx - 1];
1320146040Stjr    }
1321146040Stjr
1322146040Stjr  /* Insert the new element.  */
1323146040Stjr  set->elems[idx] = elem;
1324146040Stjr  ++set->nelem;
1325146040Stjr  return 1;
1326146040Stjr}
1327146040Stjr
1328146040Stjr/* Insert the new element ELEM to the re_node_set* SET.
1329146040Stjr   SET should not already have any element greater than or equal to ELEM.
1330146040Stjr   Return -1 if an error is occured, return 1 otherwise.  */
1331146040Stjr
1332146040Stjrstatic int
1333250724Sjkiminternal_function __attribute_warn_unused_result__
1334250724Sjkimre_node_set_insert_last (re_node_set *set, int elem)
1335146040Stjr{
1336146040Stjr  /* Realloc if we need.  */
1337146040Stjr  if (set->alloc == set->nelem)
1338146040Stjr    {
1339250724Sjkim      int *new_elems;
1340146040Stjr      set->alloc = (set->alloc + 1) * 2;
1341250724Sjkim      new_elems = re_realloc (set->elems, int, set->alloc);
1342250724Sjkim      if (BE (new_elems == NULL, 0))
1343146040Stjr	return -1;
1344250724Sjkim      set->elems = new_elems;
1345146040Stjr    }
1346146040Stjr
1347146040Stjr  /* Insert the new element.  */
1348146040Stjr  set->elems[set->nelem++] = elem;
1349146040Stjr  return 1;
1350146040Stjr}
1351146040Stjr
1352146040Stjr/* Compare two node sets SET1 and SET2.
1353146040Stjr   return 1 if SET1 and SET2 are equivalent, return 0 otherwise.  */
1354146040Stjr
1355146040Stjrstatic int
1356250724Sjkiminternal_function __attribute ((pure))
1357250724Sjkimre_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1358146040Stjr{
1359146040Stjr  int i;
1360146040Stjr  if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1361146040Stjr    return 0;
1362146040Stjr  for (i = set1->nelem ; --i >= 0 ; )
1363146040Stjr    if (set1->elems[i] != set2->elems[i])
1364146040Stjr      return 0;
1365146040Stjr  return 1;
1366146040Stjr}
1367146040Stjr
1368146040Stjr/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
1369146040Stjr
1370146040Stjrstatic int
1371250724Sjkiminternal_function __attribute ((pure))
1372250724Sjkimre_node_set_contains (const re_node_set *set, int elem)
1373146040Stjr{
1374146040Stjr  unsigned int idx, right, mid;
1375146040Stjr  if (set->nelem <= 0)
1376146040Stjr    return 0;
1377146040Stjr
1378146040Stjr  /* Binary search the element.  */
1379146040Stjr  idx = 0;
1380146040Stjr  right = set->nelem - 1;
1381146040Stjr  while (idx < right)
1382146040Stjr    {
1383146040Stjr      mid = (idx + right) / 2;
1384146040Stjr      if (set->elems[mid] < elem)
1385146040Stjr	idx = mid + 1;
1386146040Stjr      else
1387146040Stjr	right = mid;
1388146040Stjr    }
1389146040Stjr  return set->elems[idx] == elem ? idx + 1 : 0;
1390146040Stjr}
1391146040Stjr
1392146040Stjrstatic void
1393250724Sjkiminternal_function
1394250724Sjkimre_node_set_remove_at (re_node_set *set, int idx)
1395146040Stjr{
1396146040Stjr  if (idx < 0 || idx >= set->nelem)
1397146040Stjr    return;
1398146040Stjr  --set->nelem;
1399146040Stjr  for (; idx < set->nelem; idx++)
1400146040Stjr    set->elems[idx] = set->elems[idx + 1];
1401146040Stjr}
1402146040Stjr
1403146040Stjr
1404146040Stjr/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1405146040Stjr   Or return -1, if an error will be occured.  */
1406146040Stjr
1407146040Stjrstatic int
1408250724Sjkiminternal_function
1409250724Sjkimre_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1410146040Stjr{
1411146040Stjr  int type = token.type;
1412146040Stjr  if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1413146040Stjr    {
1414250724Sjkim      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1415146040Stjr      int *new_nexts, *new_indices;
1416146040Stjr      re_node_set *new_edests, *new_eclosures;
1417250724Sjkim      re_token_t *new_nodes;
1418146040Stjr
1419250724Sjkim      /* Avoid overflows in realloc.  */
1420250724Sjkim      const size_t max_object_size = MAX (sizeof (re_token_t),
1421250724Sjkim					  MAX (sizeof (re_node_set),
1422250724Sjkim					       sizeof (int)));
1423250724Sjkim      if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
1424146040Stjr	return -1;
1425250724Sjkim
1426250724Sjkim      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1427250724Sjkim      if (BE (new_nodes == NULL, 0))
1428250724Sjkim	return -1;
1429250724Sjkim      dfa->nodes = new_nodes;
1430146040Stjr      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
1431146040Stjr      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
1432146040Stjr      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1433146040Stjr      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1434146040Stjr      if (BE (new_nexts == NULL || new_indices == NULL
1435146040Stjr	      || new_edests == NULL || new_eclosures == NULL, 0))
1436146040Stjr	return -1;
1437146040Stjr      dfa->nexts = new_nexts;
1438146040Stjr      dfa->org_indices = new_indices;
1439146040Stjr      dfa->edests = new_edests;
1440146040Stjr      dfa->eclosures = new_eclosures;
1441146040Stjr      dfa->nodes_alloc = new_nodes_alloc;
1442146040Stjr    }
1443146040Stjr  dfa->nodes[dfa->nodes_len] = token;
1444146040Stjr  dfa->nodes[dfa->nodes_len].constraint = 0;
1445146040Stjr#ifdef RE_ENABLE_I18N
1446146040Stjr  dfa->nodes[dfa->nodes_len].accept_mb =
1447146040Stjr    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1448146040Stjr#endif
1449146040Stjr  dfa->nexts[dfa->nodes_len] = -1;
1450146040Stjr  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1451146040Stjr  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1452146040Stjr  return dfa->nodes_len++;
1453146040Stjr}
1454146040Stjr
1455250724Sjkimstatic inline unsigned int
1456250724Sjkiminternal_function
1457250724Sjkimcalc_state_hash (const re_node_set *nodes, unsigned int context)
1458146040Stjr{
1459146040Stjr  unsigned int hash = nodes->nelem + context;
1460146040Stjr  int i;
1461146040Stjr  for (i = 0 ; i < nodes->nelem ; i++)
1462146040Stjr    hash += nodes->elems[i];
1463146040Stjr  return hash;
1464146040Stjr}
1465146040Stjr
1466146040Stjr/* Search for the state whose node_set is equivalent to NODES.
1467146040Stjr   Return the pointer to the state, if we found it in the DFA.
1468146040Stjr   Otherwise create the new one and return it.  In case of an error
1469146040Stjr   return NULL and set the error code in ERR.
1470146040Stjr   Note: - We assume NULL as the invalid state, then it is possible that
1471146040Stjr	   return value is NULL and ERR is REG_NOERROR.
1472146040Stjr	 - We never return non-NULL value in case of any errors, it is for
1473146040Stjr	   optimization.  */
1474146040Stjr
1475250724Sjkimstatic re_dfastate_t *
1476250724Sjkiminternal_function __attribute_warn_unused_result__
1477250724Sjkimre_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1478250724Sjkim		  const re_node_set *nodes)
1479146040Stjr{
1480146040Stjr  unsigned int hash;
1481146040Stjr  re_dfastate_t *new_state;
1482146040Stjr  struct re_state_table_entry *spot;
1483146040Stjr  int i;
1484146040Stjr  if (BE (nodes->nelem == 0, 0))
1485146040Stjr    {
1486146040Stjr      *err = REG_NOERROR;
1487146040Stjr      return NULL;
1488146040Stjr    }
1489146040Stjr  hash = calc_state_hash (nodes, 0);
1490146040Stjr  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1491146040Stjr
1492146040Stjr  for (i = 0 ; i < spot->num ; i++)
1493146040Stjr    {
1494146040Stjr      re_dfastate_t *state = spot->array[i];
1495146040Stjr      if (hash != state->hash)
1496146040Stjr	continue;
1497146040Stjr      if (re_node_set_compare (&state->nodes, nodes))
1498146040Stjr	return state;
1499146040Stjr    }
1500146040Stjr
1501146040Stjr  /* There are no appropriate state in the dfa, create the new one.  */
1502146040Stjr  new_state = create_ci_newstate (dfa, nodes, hash);
1503250724Sjkim  if (BE (new_state == NULL, 0))
1504250724Sjkim    *err = REG_ESPACE;
1505250724Sjkim
1506250724Sjkim  return new_state;
1507146040Stjr}
1508146040Stjr
1509146040Stjr/* Search for the state whose node_set is equivalent to NODES and
1510146040Stjr   whose context is equivalent to CONTEXT.
1511146040Stjr   Return the pointer to the state, if we found it in the DFA.
1512146040Stjr   Otherwise create the new one and return it.  In case of an error
1513146040Stjr   return NULL and set the error code in ERR.
1514146040Stjr   Note: - We assume NULL as the invalid state, then it is possible that
1515146040Stjr	   return value is NULL and ERR is REG_NOERROR.
1516146040Stjr	 - We never return non-NULL value in case of any errors, it is for
1517146040Stjr	   optimization.  */
1518146040Stjr
1519250724Sjkimstatic re_dfastate_t *
1520250724Sjkiminternal_function __attribute_warn_unused_result__
1521250724Sjkimre_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1522250724Sjkim			  const re_node_set *nodes, unsigned int context)
1523146040Stjr{
1524146040Stjr  unsigned int hash;
1525146040Stjr  re_dfastate_t *new_state;
1526146040Stjr  struct re_state_table_entry *spot;
1527146040Stjr  int i;
1528146040Stjr  if (nodes->nelem == 0)
1529146040Stjr    {
1530146040Stjr      *err = REG_NOERROR;
1531146040Stjr      return NULL;
1532146040Stjr    }
1533146040Stjr  hash = calc_state_hash (nodes, context);
1534146040Stjr  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1535146040Stjr
1536146040Stjr  for (i = 0 ; i < spot->num ; i++)
1537146040Stjr    {
1538146040Stjr      re_dfastate_t *state = spot->array[i];
1539146040Stjr      if (state->hash == hash
1540146040Stjr	  && state->context == context
1541146040Stjr	  && re_node_set_compare (state->entrance_nodes, nodes))
1542146040Stjr	return state;
1543146040Stjr    }
1544146040Stjr  /* There are no appropriate state in `dfa', create the new one.  */
1545146040Stjr  new_state = create_cd_newstate (dfa, nodes, context, hash);
1546250724Sjkim  if (BE (new_state == NULL, 0))
1547250724Sjkim    *err = REG_ESPACE;
1548250724Sjkim
1549250724Sjkim  return new_state;
1550146040Stjr}
1551146040Stjr
1552146040Stjr/* Finish initialization of the new state NEWSTATE, and using its hash value
1553146040Stjr   HASH put in the appropriate bucket of DFA's state table.  Return value
1554146040Stjr   indicates the error code if failed.  */
1555146040Stjr
1556146040Stjrstatic reg_errcode_t
1557250724Sjkim__attribute_warn_unused_result__
1558250724Sjkimregister_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1559250724Sjkim		unsigned int hash)
1560146040Stjr{
1561146040Stjr  struct re_state_table_entry *spot;
1562146040Stjr  reg_errcode_t err;
1563146040Stjr  int i;
1564146040Stjr
1565146040Stjr  newstate->hash = hash;
1566146040Stjr  err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1567146040Stjr  if (BE (err != REG_NOERROR, 0))
1568146040Stjr    return REG_ESPACE;
1569146040Stjr  for (i = 0; i < newstate->nodes.nelem; i++)
1570146040Stjr    {
1571146040Stjr      int elem = newstate->nodes.elems[i];
1572146040Stjr      if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1573250724Sjkim	if (re_node_set_insert_last (&newstate->non_eps_nodes, elem) < 0)
1574250724Sjkim	  return REG_ESPACE;
1575146040Stjr    }
1576146040Stjr
1577146040Stjr  spot = dfa->state_table + (hash & dfa->state_hash_mask);
1578146040Stjr  if (BE (spot->alloc <= spot->num, 0))
1579146040Stjr    {
1580146040Stjr      int new_alloc = 2 * spot->num + 2;
1581146040Stjr      re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1582146040Stjr					      new_alloc);
1583146040Stjr      if (BE (new_array == NULL, 0))
1584146040Stjr	return REG_ESPACE;
1585146040Stjr      spot->array = new_array;
1586146040Stjr      spot->alloc = new_alloc;
1587146040Stjr    }
1588146040Stjr  spot->array[spot->num++] = newstate;
1589146040Stjr  return REG_NOERROR;
1590146040Stjr}
1591146040Stjr
1592250724Sjkimstatic void
1593250724Sjkimfree_state (re_dfastate_t *state)
1594250724Sjkim{
1595250724Sjkim  re_node_set_free (&state->non_eps_nodes);
1596250724Sjkim  re_node_set_free (&state->inveclosure);
1597250724Sjkim  if (state->entrance_nodes != &state->nodes)
1598250724Sjkim    {
1599250724Sjkim      re_node_set_free (state->entrance_nodes);
1600250724Sjkim      re_free (state->entrance_nodes);
1601250724Sjkim    }
1602250724Sjkim  re_node_set_free (&state->nodes);
1603250724Sjkim  re_free (state->word_trtable);
1604250724Sjkim  re_free (state->trtable);
1605250724Sjkim  re_free (state);
1606250724Sjkim}
1607250724Sjkim
1608146040Stjr/* Create the new state which is independ of contexts.
1609146040Stjr   Return the new state if succeeded, otherwise return NULL.  */
1610146040Stjr
1611146040Stjrstatic re_dfastate_t *
1612250724Sjkiminternal_function __attribute_warn_unused_result__
1613250724Sjkimcreate_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1614250724Sjkim		    unsigned int hash)
1615146040Stjr{
1616146040Stjr  int i;
1617146040Stjr  reg_errcode_t err;
1618146040Stjr  re_dfastate_t *newstate;
1619146040Stjr
1620146040Stjr  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1621146040Stjr  if (BE (newstate == NULL, 0))
1622146040Stjr    return NULL;
1623146040Stjr  err = re_node_set_init_copy (&newstate->nodes, nodes);
1624146040Stjr  if (BE (err != REG_NOERROR, 0))
1625146040Stjr    {
1626146040Stjr      re_free (newstate);
1627146040Stjr      return NULL;
1628146040Stjr    }
1629146040Stjr
1630146040Stjr  newstate->entrance_nodes = &newstate->nodes;
1631146040Stjr  for (i = 0 ; i < nodes->nelem ; i++)
1632146040Stjr    {
1633146040Stjr      re_token_t *node = dfa->nodes + nodes->elems[i];
1634146040Stjr      re_token_type_t type = node->type;
1635146040Stjr      if (type == CHARACTER && !node->constraint)
1636146040Stjr	continue;
1637146040Stjr#ifdef RE_ENABLE_I18N
1638146040Stjr      newstate->accept_mb |= node->accept_mb;
1639146040Stjr#endif /* RE_ENABLE_I18N */
1640146040Stjr
1641146040Stjr      /* If the state has the halt node, the state is a halt state.  */
1642146040Stjr      if (type == END_OF_RE)
1643146040Stjr	newstate->halt = 1;
1644146040Stjr      else if (type == OP_BACK_REF)
1645146040Stjr	newstate->has_backref = 1;
1646146040Stjr      else if (type == ANCHOR || node->constraint)
1647146040Stjr	newstate->has_constraint = 1;
1648146040Stjr    }
1649146040Stjr  err = register_state (dfa, newstate, hash);
1650146040Stjr  if (BE (err != REG_NOERROR, 0))
1651146040Stjr    {
1652146040Stjr      free_state (newstate);
1653146040Stjr      newstate = NULL;
1654146040Stjr    }
1655146040Stjr  return newstate;
1656146040Stjr}
1657146040Stjr
1658146040Stjr/* Create the new state which is depend on the context CONTEXT.
1659146040Stjr   Return the new state if succeeded, otherwise return NULL.  */
1660146040Stjr
1661146040Stjrstatic re_dfastate_t *
1662250724Sjkiminternal_function __attribute_warn_unused_result__
1663250724Sjkimcreate_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1664250724Sjkim		    unsigned int context, unsigned int hash)
1665146040Stjr{
1666146040Stjr  int i, nctx_nodes = 0;
1667146040Stjr  reg_errcode_t err;
1668146040Stjr  re_dfastate_t *newstate;
1669146040Stjr
1670146040Stjr  newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1671146040Stjr  if (BE (newstate == NULL, 0))
1672146040Stjr    return NULL;
1673146040Stjr  err = re_node_set_init_copy (&newstate->nodes, nodes);
1674146040Stjr  if (BE (err != REG_NOERROR, 0))
1675146040Stjr    {
1676146040Stjr      re_free (newstate);
1677146040Stjr      return NULL;
1678146040Stjr    }
1679146040Stjr
1680146040Stjr  newstate->context = context;
1681146040Stjr  newstate->entrance_nodes = &newstate->nodes;
1682146040Stjr
1683146040Stjr  for (i = 0 ; i < nodes->nelem ; i++)
1684146040Stjr    {
1685146040Stjr      re_token_t *node = dfa->nodes + nodes->elems[i];
1686146040Stjr      re_token_type_t type = node->type;
1687250724Sjkim      unsigned int constraint = node->constraint;
1688146040Stjr
1689146040Stjr      if (type == CHARACTER && !constraint)
1690146040Stjr	continue;
1691146040Stjr#ifdef RE_ENABLE_I18N
1692146040Stjr      newstate->accept_mb |= node->accept_mb;
1693146040Stjr#endif /* RE_ENABLE_I18N */
1694146040Stjr
1695146040Stjr      /* If the state has the halt node, the state is a halt state.  */
1696146040Stjr      if (type == END_OF_RE)
1697146040Stjr	newstate->halt = 1;
1698146040Stjr      else if (type == OP_BACK_REF)
1699146040Stjr	newstate->has_backref = 1;
1700146040Stjr
1701146040Stjr      if (constraint)
1702146040Stjr	{
1703146040Stjr	  if (newstate->entrance_nodes == &newstate->nodes)
1704146040Stjr	    {
1705146040Stjr	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
1706146040Stjr	      if (BE (newstate->entrance_nodes == NULL, 0))
1707146040Stjr		{
1708146040Stjr		  free_state (newstate);
1709146040Stjr		  return NULL;
1710146040Stjr		}
1711250724Sjkim	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1712250724Sjkim		  != REG_NOERROR)
1713250724Sjkim		return NULL;
1714146040Stjr	      nctx_nodes = 0;
1715146040Stjr	      newstate->has_constraint = 1;
1716146040Stjr	    }
1717146040Stjr
1718146040Stjr	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1719146040Stjr	    {
1720146040Stjr	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1721146040Stjr	      ++nctx_nodes;
1722146040Stjr	    }
1723146040Stjr	}
1724146040Stjr    }
1725146040Stjr  err = register_state (dfa, newstate, hash);
1726146040Stjr  if (BE (err != REG_NOERROR, 0))
1727146040Stjr    {
1728146040Stjr      free_state (newstate);
1729146040Stjr      newstate = NULL;
1730146040Stjr    }
1731146040Stjr  return  newstate;
1732146040Stjr}
1733