1/*
2 * sssort.c for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include "divsufsort_private.h"
28
29
30/*- Private Functions -*/
31
32static const saint_t lg_table[256]= {
33 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41};
42
43#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
44
45static INLINE
46saint_t
47ss_ilg(saidx_t n) {
48#if SS_BLOCKSIZE == 0
49# if defined(BUILD_DIVSUFSORT64)
50  return (n >> 32) ?
51          ((n >> 48) ?
52            ((n >> 56) ?
53              56 + lg_table[(n >> 56) & 0xff] :
54              48 + lg_table[(n >> 48) & 0xff]) :
55            ((n >> 40) ?
56              40 + lg_table[(n >> 40) & 0xff] :
57              32 + lg_table[(n >> 32) & 0xff])) :
58          ((n & 0xffff0000) ?
59            ((n & 0xff000000) ?
60              24 + lg_table[(n >> 24) & 0xff] :
61              16 + lg_table[(n >> 16) & 0xff]) :
62            ((n & 0x0000ff00) ?
63               8 + lg_table[(n >>  8) & 0xff] :
64               0 + lg_table[(n >>  0) & 0xff]));
65# else
66  return (n & 0xffff0000) ?
67          ((n & 0xff000000) ?
68            24 + lg_table[(n >> 24) & 0xff] :
69            16 + lg_table[(n >> 16) & 0xff]) :
70          ((n & 0x0000ff00) ?
71             8 + lg_table[(n >>  8) & 0xff] :
72             0 + lg_table[(n >>  0) & 0xff]);
73# endif
74#elif SS_BLOCKSIZE < 256
75  return lg_table[n];
76#else
77  return (n & 0xff00) ?
78          8 + lg_table[(n >> 8) & 0xff] :
79          0 + lg_table[(n >> 0) & 0xff];
80#endif
81}
82
83#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
84
85#if SS_BLOCKSIZE != 0
86
87static const saint_t sqq_table[256] = {
88  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
89 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
90 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
91110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
92128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
93143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
94156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
95169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
96181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
97192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
98202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
99212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
100221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
101230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
102239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
103247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
104};
105
106static INLINE
107saidx_t
108ss_isqrt(saidx_t x) {
109  saidx_t y, e;
110
111  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
112  e = (x & 0xffff0000) ?
113        ((x & 0xff000000) ?
114          24 + lg_table[(x >> 24) & 0xff] :
115          16 + lg_table[(x >> 16) & 0xff]) :
116        ((x & 0x0000ff00) ?
117           8 + lg_table[(x >>  8) & 0xff] :
118           0 + lg_table[(x >>  0) & 0xff]);
119
120  if(e >= 16) {
121    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
122    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
123    y = (y + 1 + x / y) >> 1;
124  } else if(e >= 8) {
125    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
126  } else {
127    return sqq_table[x] >> 4;
128  }
129
130  return (x < (y * y)) ? y - 1 : y;
131}
132
133#endif /* SS_BLOCKSIZE != 0 */
134
135
136/*---------------------------------------------------------------------------*/
137
138/* Compares two suffixes. */
139static INLINE
140saint_t
141ss_compare(const sauchar_t *T,
142           const saidx_t *p1, const saidx_t *p2,
143           saidx_t depth) {
144  const sauchar_t *U1, *U2, *U1n, *U2n;
145
146  for(U1 = T + depth + *p1,
147      U2 = T + depth + *p2,
148      U1n = T + *(p1 + 1) + 2,
149      U2n = T + *(p2 + 1) + 2;
150      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
151      ++U1, ++U2) {
152  }
153
154  return U1 < U1n ?
155        (U2 < U2n ? *U1 - *U2 : 1) :
156        (U2 < U2n ? -1 : 0);
157}
158
159
160/*---------------------------------------------------------------------------*/
161
162#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
163
164/* Insertionsort for small size groups */
165static
166void
167ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
168                 saidx_t *first, saidx_t *last, saidx_t depth) {
169  saidx_t *i, *j;
170  saidx_t t;
171  saint_t r;
172
173  for(i = last - 2; first <= i; --i) {
174    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
175      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
176      if(last <= j) { break; }
177    }
178    if(r == 0) { *j = ~*j; }
179    *(j - 1) = t;
180  }
181}
182
183#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
184
185
186/*---------------------------------------------------------------------------*/
187
188#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
189
190static INLINE
191void
192ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
193           saidx_t *SA, saidx_t i, saidx_t size) {
194  saidx_t j, k;
195  saidx_t v;
196  saint_t c, d, e;
197
198  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
199    d = Td[PA[SA[k = j++]]];
200    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
201    if(d <= c) { break; }
202  }
203  SA[i] = v;
204}
205
206/* Simple top-down heapsort. */
207static
208void
209ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
210  saidx_t i, m;
211  saidx_t t;
212
213  m = size;
214  if((size % 2) == 0) {
215    m--;
216    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
217  }
218
219  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
220  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
221  for(i = m - 1; 0 < i; --i) {
222    t = SA[0], SA[0] = SA[i];
223    ss_fixdown(Td, PA, SA, 0, i);
224    SA[i] = t;
225  }
226}
227
228
229/*---------------------------------------------------------------------------*/
230
231/* Returns the median of three elements. */
232static INLINE
233saidx_t *
234ss_median3(const sauchar_t *Td, const saidx_t *PA,
235           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
236  saidx_t *t;
237  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
238  if(Td[PA[*v2]] > Td[PA[*v3]]) {
239    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
240    else { return v3; }
241  }
242  return v2;
243}
244
245/* Returns the median of five elements. */
246static INLINE
247saidx_t *
248ss_median5(const sauchar_t *Td, const saidx_t *PA,
249           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
250  saidx_t *t;
251  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
252  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
253  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
254  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
255  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
256  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
257  return v3;
258}
259
260/* Returns the pivot element. */
261static INLINE
262saidx_t *
263ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
264  saidx_t *middle;
265  saidx_t t;
266
267  t = last - first;
268  middle = first + t / 2;
269
270  if(t <= 512) {
271    if(t <= 32) {
272      return ss_median3(Td, PA, first, middle, last - 1);
273    } else {
274      t >>= 2;
275      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
276    }
277  }
278  t >>= 3;
279  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
280  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
281  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
282  return ss_median3(Td, PA, first, middle, last);
283}
284
285
286/*---------------------------------------------------------------------------*/
287
288/* Binary partition for substrings. */
289static INLINE
290saidx_t *
291ss_partition(const saidx_t *PA,
292                    saidx_t *first, saidx_t *last, saidx_t depth) {
293  saidx_t *a, *b;
294  saidx_t t;
295  for(a = first - 1, b = last;;) {
296    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
297    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
298    if(b <= a) { break; }
299    t = ~*b;
300    *b = *a;
301    *a = t;
302  }
303  if(first < a) { *first = ~*first; }
304  return a;
305}
306
307/* Multikey introsort for medium size groups. */
308static
309void
310ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
311              saidx_t *first, saidx_t *last,
312              saidx_t depth) {
313#define STACK_SIZE SS_MISORT_STACKSIZE
314  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
315  const sauchar_t *Td;
316  saidx_t *a, *b, *c, *d, *e, *f;
317  saidx_t s, t;
318  saint_t ssize;
319  saint_t limit;
320  saint_t v, x = 0;
321
322  for(ssize = 0, limit = ss_ilg(last - first);;) {
323
324    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
325#if 1 < SS_INSERTIONSORT_THRESHOLD
326      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
327#endif
328      STACK_POP(first, last, depth, limit);
329      continue;
330    }
331
332    Td = T + depth;
333    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
334    if(limit < 0) {
335      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
336        if((x = Td[PA[*a]]) != v) {
337          if(1 < (a - first)) { break; }
338          v = x;
339          first = a;
340        }
341      }
342      if(Td[PA[*first] - 1] < v) {
343        first = ss_partition(PA, first, a, depth);
344      }
345      if((a - first) <= (last - a)) {
346        if(1 < (a - first)) {
347          STACK_PUSH(a, last, depth, -1);
348          last = a, depth += 1, limit = ss_ilg(a - first);
349        } else {
350          first = a, limit = -1;
351        }
352      } else {
353        if(1 < (last - a)) {
354          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
355          first = a, limit = -1;
356        } else {
357          last = a, depth += 1, limit = ss_ilg(a - first);
358        }
359      }
360      continue;
361    }
362
363    /* choose pivot */
364    a = ss_pivot(Td, PA, first, last);
365    v = Td[PA[*a]];
366    SWAP(*first, *a);
367
368    /* partition */
369    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
370    if(((a = b) < last) && (x < v)) {
371      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
372        if(x == v) { SWAP(*b, *a); ++a; }
373      }
374    }
375    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
376    if((b < (d = c)) && (x > v)) {
377      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
378        if(x == v) { SWAP(*c, *d); --d; }
379      }
380    }
381    for(; b < c;) {
382      SWAP(*b, *c);
383      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
384        if(x == v) { SWAP(*b, *a); ++a; }
385      }
386      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
387        if(x == v) { SWAP(*c, *d); --d; }
388      }
389    }
390
391    if(a <= d) {
392      c = b - 1;
393
394      if((s = a - first) > (t = b - a)) { s = t; }
395      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
396      if((s = d - c) > (t = last - d - 1)) { s = t; }
397      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
398
399      a = first + (b - a), c = last - (d - c);
400      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
401
402      if((a - first) <= (last - c)) {
403        if((last - c) <= (c - b)) {
404          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
405          STACK_PUSH(c, last, depth, limit);
406          last = a;
407        } else if((a - first) <= (c - b)) {
408          STACK_PUSH(c, last, depth, limit);
409          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
410          last = a;
411        } else {
412          STACK_PUSH(c, last, depth, limit);
413          STACK_PUSH(first, a, depth, limit);
414          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
415        }
416      } else {
417        if((a - first) <= (c - b)) {
418          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
419          STACK_PUSH(first, a, depth, limit);
420          first = c;
421        } else if((last - c) <= (c - b)) {
422          STACK_PUSH(first, a, depth, limit);
423          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
424          first = c;
425        } else {
426          STACK_PUSH(first, a, depth, limit);
427          STACK_PUSH(c, last, depth, limit);
428          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
429        }
430      }
431    } else {
432      limit += 1;
433      if(Td[PA[*first] - 1] < v) {
434        first = ss_partition(PA, first, last, depth);
435        limit = ss_ilg(last - first);
436      }
437      depth += 1;
438    }
439  }
440#undef STACK_SIZE
441}
442
443#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
444
445
446/*---------------------------------------------------------------------------*/
447
448#if SS_BLOCKSIZE != 0
449
450static INLINE
451void
452ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
453  saidx_t t;
454  for(; 0 < n; --n, ++a, ++b) {
455    t = *a, *a = *b, *b = t;
456  }
457}
458
459static INLINE
460void
461ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
462  saidx_t *a, *b, t;
463  saidx_t l, r;
464  l = middle - first, r = last - middle;
465  for(; (0 < l) && (0 < r);) {
466    if(l == r) { ss_blockswap(first, middle, l); break; }
467    if(l < r) {
468      a = last - 1, b = middle - 1;
469      t = *a;
470      do {
471        *a-- = *b, *b-- = *a;
472        if(b < first) {
473          *a = t;
474          last = a;
475          if((r -= l + 1) <= l) { break; }
476          a -= 1, b = middle - 1;
477          t = *a;
478        }
479      } while(1);
480    } else {
481      a = first, b = middle;
482      t = *a;
483      do {
484        *a++ = *b, *b++ = *a;
485        if(last <= b) {
486          *a = t;
487          first = a + 1;
488          if((l -= r + 1) <= r) { break; }
489          a += 1, b = middle;
490          t = *a;
491        }
492      } while(1);
493    }
494  }
495}
496
497
498/*---------------------------------------------------------------------------*/
499
500static
501void
502ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
503                saidx_t *first, saidx_t *middle, saidx_t *last,
504                saidx_t depth) {
505  const saidx_t *p;
506  saidx_t *a, *b;
507  saidx_t len, half;
508  saint_t q, r;
509  saint_t x;
510
511  for(;;) {
512    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
513    else                { x = 0; p = PA +  *(last - 1); }
514    for(a = first, len = middle - first, half = len >> 1, r = -1;
515        0 < len;
516        len = half, half >>= 1) {
517      b = a + half;
518      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
519      if(q < 0) {
520        a = b + 1;
521        half -= (len & 1) ^ 1;
522      } else {
523        r = q;
524      }
525    }
526    if(a < middle) {
527      if(r == 0) { *a = ~*a; }
528      ss_rotate(a, middle, last);
529      last -= middle - a;
530      middle = a;
531      if(first == middle) { break; }
532    }
533    --last;
534    if(x != 0) { while(*--last < 0) { } }
535    if(middle == last) { break; }
536  }
537}
538
539
540/*---------------------------------------------------------------------------*/
541
542/* Merge-forward with internal buffer. */
543static
544void
545ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
546                saidx_t *first, saidx_t *middle, saidx_t *last,
547                saidx_t *buf, saidx_t depth) {
548  saidx_t *a, *b, *c, *bufend;
549  saidx_t t;
550  saint_t r;
551
552  bufend = buf + (middle - first) - 1;
553  ss_blockswap(buf, first, middle - first);
554
555  for(t = *(a = first), b = buf, c = middle;;) {
556    r = ss_compare(T, PA + *b, PA + *c, depth);
557    if(r < 0) {
558      do {
559        *a++ = *b;
560        if(bufend <= b) { *bufend = t; return; }
561        *b++ = *a;
562      } while(*b < 0);
563    } else if(r > 0) {
564      do {
565        *a++ = *c, *c++ = *a;
566        if(last <= c) {
567          while(b < bufend) { *a++ = *b, *b++ = *a; }
568          *a = *b, *b = t;
569          return;
570        }
571      } while(*c < 0);
572    } else {
573      *c = ~*c;
574      do {
575        *a++ = *b;
576        if(bufend <= b) { *bufend = t; return; }
577        *b++ = *a;
578      } while(*b < 0);
579
580      do {
581        *a++ = *c, *c++ = *a;
582        if(last <= c) {
583          while(b < bufend) { *a++ = *b, *b++ = *a; }
584          *a = *b, *b = t;
585          return;
586        }
587      } while(*c < 0);
588    }
589  }
590}
591
592/* Merge-backward with internal buffer. */
593static
594void
595ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
596                 saidx_t *first, saidx_t *middle, saidx_t *last,
597                 saidx_t *buf, saidx_t depth) {
598  const saidx_t *p1, *p2;
599  saidx_t *a, *b, *c, *bufend;
600  saidx_t t;
601  saint_t r;
602  saint_t x;
603
604  bufend = buf + (last - middle) - 1;
605  ss_blockswap(buf, middle, last - middle);
606
607  x = 0;
608  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
609  else                  { p1 = PA +  *bufend; }
610  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
611  else                  { p2 = PA +  *(middle - 1); }
612  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
613    r = ss_compare(T, p1, p2, depth);
614    if(0 < r) {
615      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
616      *a-- = *b;
617      if(b <= buf) { *buf = t; break; }
618      *b-- = *a;
619      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
620      else       { p1 = PA +  *b; }
621    } else if(r < 0) {
622      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
623      *a-- = *c, *c-- = *a;
624      if(c < first) {
625        while(buf < b) { *a-- = *b, *b-- = *a; }
626        *a = *b, *b = t;
627        break;
628      }
629      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
630      else       { p2 = PA +  *c; }
631    } else {
632      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
633      *a-- = ~*b;
634      if(b <= buf) { *buf = t; break; }
635      *b-- = *a;
636      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
637      *a-- = *c, *c-- = *a;
638      if(c < first) {
639        while(buf < b) { *a-- = *b, *b-- = *a; }
640        *a = *b, *b = t;
641        break;
642      }
643      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
644      else       { p1 = PA +  *b; }
645      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
646      else       { p2 = PA +  *c; }
647    }
648  }
649}
650
651/* D&C based merge. */
652static
653void
654ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
655             saidx_t *first, saidx_t *middle, saidx_t *last,
656             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
657#define STACK_SIZE SS_SMERGE_STACKSIZE
658#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
659#define MERGE_CHECK(a, b, c)\
660  do {\
661    if(((c) & 1) ||\
662       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
663      *(a) = ~*(a);\
664    }\
665    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
666      *(b) = ~*(b);\
667    }\
668  } while(0)
669  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
670  saidx_t *l, *r, *lm, *rm;
671  saidx_t m, len, half;
672  saint_t ssize;
673  saint_t check, next;
674
675  for(check = 0, ssize = 0;;) {
676    if((last - middle) <= bufsize) {
677      if((first < middle) && (middle < last)) {
678        ss_mergebackward(T, PA, first, middle, last, buf, depth);
679      }
680      MERGE_CHECK(first, last, check);
681      STACK_POP(first, middle, last, check);
682      continue;
683    }
684
685    if((middle - first) <= bufsize) {
686      if(first < middle) {
687        ss_mergeforward(T, PA, first, middle, last, buf, depth);
688      }
689      MERGE_CHECK(first, last, check);
690      STACK_POP(first, middle, last, check);
691      continue;
692    }
693
694    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
695        0 < len;
696        len = half, half >>= 1) {
697      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
698                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
699        m += half + 1;
700        half -= (len & 1) ^ 1;
701      }
702    }
703
704    if(0 < m) {
705      lm = middle - m, rm = middle + m;
706      ss_blockswap(lm, middle, m);
707      l = r = middle, next = 0;
708      if(rm < last) {
709        if(*rm < 0) {
710          *rm = ~*rm;
711          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
712          next |= 1;
713        } else if(first < lm) {
714          for(; *r < 0; ++r) { }
715          next |= 2;
716        }
717      }
718
719      if((l - first) <= (last - r)) {
720        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
721        middle = lm, last = l, check = (check & 3) | (next & 4);
722      } else {
723        if((next & 2) && (r == middle)) { next ^= 6; }
724        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
725        first = r, middle = rm, check = (next & 3) | (check & 4);
726      }
727    } else {
728      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
729        *middle = ~*middle;
730      }
731      MERGE_CHECK(first, last, check);
732      STACK_POP(first, middle, last, check);
733    }
734  }
735#undef STACK_SIZE
736}
737
738#endif /* SS_BLOCKSIZE != 0 */
739
740
741/*---------------------------------------------------------------------------*/
742
743/*- Function -*/
744
745/* Substring sort */
746void
747sssort(const sauchar_t *T, const saidx_t *PA,
748       saidx_t *first, saidx_t *last,
749       saidx_t *buf, saidx_t bufsize,
750       saidx_t depth, saidx_t n, saint_t lastsuffix) {
751  saidx_t *a;
752#if SS_BLOCKSIZE != 0
753  saidx_t *b, *middle, *curbuf;
754  saidx_t j, k, curbufsize, limit;
755#endif
756  saidx_t i;
757
758  if(lastsuffix != 0) { ++first; }
759
760#if SS_BLOCKSIZE == 0
761  ss_mintrosort(T, PA, first, last, depth);
762#else
763  if((bufsize < SS_BLOCKSIZE) &&
764      (bufsize < (last - first)) &&
765      (bufsize < (limit = ss_isqrt(last - first)))) {
766    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
767    buf = middle = last - limit, bufsize = limit;
768  } else {
769    middle = last, limit = 0;
770  }
771  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
772#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
773    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
774#elif 1 < SS_BLOCKSIZE
775    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
776#endif
777    curbufsize = last - (a + SS_BLOCKSIZE);
778    curbuf = a + SS_BLOCKSIZE;
779    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
780    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
781      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
782    }
783  }
784#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
785  ss_mintrosort(T, PA, a, middle, depth);
786#elif 1 < SS_BLOCKSIZE
787  ss_insertionsort(T, PA, a, middle, depth);
788#endif
789  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
790    if(i & 1) {
791      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
792      a -= k;
793    }
794  }
795  if(limit != 0) {
796#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
797    ss_mintrosort(T, PA, middle, last, depth);
798#elif 1 < SS_BLOCKSIZE
799    ss_insertionsort(T, PA, middle, last, depth);
800#endif
801    ss_inplacemerge(T, PA, first, middle, last, depth);
802  }
803#endif
804
805  if(lastsuffix != 0) {
806    /* Insert last type B* suffix. */
807    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
808    for(a = first, i = *(first - 1);
809        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
810        ++a) {
811      *(a - 1) = *a;
812    }
813    *(a - 1) = i;
814  }
815}
816