1/*
2 * trsort.c for libdivsufsort
3 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person
6 * obtaining a copy of this software and associated documentation
7 * files (the "Software"), to deal in the Software without
8 * restriction, including without limitation the rights to use,
9 * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following
12 * conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 * OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27#include "divsufsort_private.h"
28
29
30/*- Private Functions -*/
31
32static const saint_t lg_table[256]= {
33 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41};
42
43static INLINE
44saint_t
45tr_ilg(saidx_t n) {
46#if defined(BUILD_DIVSUFSORT64)
47  return (n >> 32) ?
48          ((n >> 48) ?
49            ((n >> 56) ?
50              56 + lg_table[(n >> 56) & 0xff] :
51              48 + lg_table[(n >> 48) & 0xff]) :
52            ((n >> 40) ?
53              40 + lg_table[(n >> 40) & 0xff] :
54              32 + lg_table[(n >> 32) & 0xff])) :
55          ((n & 0xffff0000) ?
56            ((n & 0xff000000) ?
57              24 + lg_table[(n >> 24) & 0xff] :
58              16 + lg_table[(n >> 16) & 0xff]) :
59            ((n & 0x0000ff00) ?
60               8 + lg_table[(n >>  8) & 0xff] :
61               0 + lg_table[(n >>  0) & 0xff]));
62#else
63  return (n & 0xffff0000) ?
64          ((n & 0xff000000) ?
65            24 + lg_table[(n >> 24) & 0xff] :
66            16 + lg_table[(n >> 16) & 0xff]) :
67          ((n & 0x0000ff00) ?
68             8 + lg_table[(n >>  8) & 0xff] :
69             0 + lg_table[(n >>  0) & 0xff]);
70#endif
71}
72
73
74/*---------------------------------------------------------------------------*/
75
76/* Simple insertionsort for small size groups. */
77static
78void
79tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
80  saidx_t *a, *b;
81  saidx_t t, r;
82
83  for(a = first + 1; a < last; ++a) {
84    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
85      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
86      if(b < first) { break; }
87    }
88    if(r == 0) { *b = ~*b; }
89    *(b + 1) = t;
90  }
91}
92
93
94/*---------------------------------------------------------------------------*/
95
96static INLINE
97void
98tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
99  saidx_t j, k;
100  saidx_t v;
101  saidx_t c, d, e;
102
103  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
104    d = ISAd[SA[k = j++]];
105    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
106    if(d <= c) { break; }
107  }
108  SA[i] = v;
109}
110
111/* Simple top-down heapsort. */
112static
113void
114tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
115  saidx_t i, m;
116  saidx_t t;
117
118  m = size;
119  if((size % 2) == 0) {
120    m--;
121    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
122  }
123
124  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
125  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
126  for(i = m - 1; 0 < i; --i) {
127    t = SA[0], SA[0] = SA[i];
128    tr_fixdown(ISAd, SA, 0, i);
129    SA[i] = t;
130  }
131}
132
133
134/*---------------------------------------------------------------------------*/
135
136/* Returns the median of three elements. */
137static INLINE
138saidx_t *
139tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
140  saidx_t *t;
141  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
142  if(ISAd[*v2] > ISAd[*v3]) {
143    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
144    else { return v3; }
145  }
146  return v2;
147}
148
149/* Returns the median of five elements. */
150static INLINE
151saidx_t *
152tr_median5(const saidx_t *ISAd,
153           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
154  saidx_t *t;
155  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
156  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
157  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
158  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
159  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
160  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
161  return v3;
162}
163
164/* Returns the pivot element. */
165static INLINE
166saidx_t *
167tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
168  saidx_t *middle;
169  saidx_t t;
170
171  t = last - first;
172  middle = first + t / 2;
173
174  if(t <= 512) {
175    if(t <= 32) {
176      return tr_median3(ISAd, first, middle, last - 1);
177    } else {
178      t >>= 2;
179      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
180    }
181  }
182  t >>= 3;
183  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
184  middle = tr_median3(ISAd, middle - t, middle, middle + t);
185  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
186  return tr_median3(ISAd, first, middle, last);
187}
188
189
190/*---------------------------------------------------------------------------*/
191
192typedef struct _trbudget_t trbudget_t;
193struct _trbudget_t {
194  saidx_t chance;
195  saidx_t remain;
196  saidx_t incval;
197  saidx_t count;
198};
199
200static INLINE
201void
202trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
203  budget->chance = chance;
204  budget->remain = budget->incval = incval;
205}
206
207static INLINE
208saint_t
209trbudget_check(trbudget_t *budget, saidx_t size) {
210  if(size <= budget->remain) { budget->remain -= size; return 1; }
211  if(budget->chance == 0) { budget->count += size; return 0; }
212  budget->remain += budget->incval - size;
213  budget->chance -= 1;
214  return 1;
215}
216
217
218/*---------------------------------------------------------------------------*/
219
220static INLINE
221void
222tr_partition(const saidx_t *ISAd,
223             saidx_t *first, saidx_t *middle, saidx_t *last,
224             saidx_t **pa, saidx_t **pb, saidx_t v) {
225  saidx_t *a, *b, *c, *d, *e, *f;
226  saidx_t t, s;
227  saidx_t x = 0;
228
229  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
230  if(((a = b) < last) && (x < v)) {
231    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
232      if(x == v) { SWAP(*b, *a); ++a; }
233    }
234  }
235  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
236  if((b < (d = c)) && (x > v)) {
237    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
238      if(x == v) { SWAP(*c, *d); --d; }
239    }
240  }
241  for(; b < c;) {
242    SWAP(*b, *c);
243    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
244      if(x == v) { SWAP(*b, *a); ++a; }
245    }
246    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
247      if(x == v) { SWAP(*c, *d); --d; }
248    }
249  }
250
251  if(a <= d) {
252    c = b - 1;
253    if((s = a - first) > (t = b - a)) { s = t; }
254    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
255    if((s = d - c) > (t = last - d - 1)) { s = t; }
256    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
257    first += (b - a), last -= (d - c);
258  }
259  *pa = first, *pb = last;
260}
261
262static
263void
264tr_copy(saidx_t *ISA, const saidx_t *SA,
265        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
266        saidx_t depth) {
267  /* sort suffixes of middle partition
268     by using sorted order of suffixes of left and right partition. */
269  saidx_t *c, *d, *e;
270  saidx_t s, v;
271
272  v = b - SA - 1;
273  for(c = first, d = a - 1; c <= d; ++c) {
274    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
275      *++d = s;
276      ISA[s] = d - SA;
277    }
278  }
279  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
280    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
281      *--d = s;
282      ISA[s] = d - SA;
283    }
284  }
285}
286
287static
288void
289tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
290               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
291               saidx_t depth) {
292  saidx_t *c, *d, *e;
293  saidx_t s, v;
294  saidx_t rank, lastrank, newrank = -1;
295
296  v = b - SA - 1;
297  lastrank = -1;
298  for(c = first, d = a - 1; c <= d; ++c) {
299    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
300      *++d = s;
301      rank = ISA[s + depth];
302      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
303      ISA[s] = newrank;
304    }
305  }
306
307  lastrank = -1;
308  for(e = d; first <= e; --e) {
309    rank = ISA[*e];
310    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
311    if(newrank != rank) { ISA[*e] = newrank; }
312  }
313
314  lastrank = -1;
315  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
316    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
317      *--d = s;
318      rank = ISA[s + depth];
319      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
320      ISA[s] = newrank;
321    }
322  }
323}
324
325static
326void
327tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
328             saidx_t *SA, saidx_t *first, saidx_t *last,
329             trbudget_t *budget) {
330#define STACK_SIZE TR_STACKSIZE
331  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
332  saidx_t *a, *b, *c;
333  saidx_t t;
334  saidx_t v, x = 0;
335  saidx_t incr = ISAd - ISA;
336  saint_t limit, next;
337  saint_t ssize, trlink = -1;
338
339  for(ssize = 0, limit = tr_ilg(last - first);;) {
340
341    if(limit < 0) {
342      if(limit == -1) {
343        /* tandem repeat partition */
344        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
345
346        /* update ranks */
347        if(a < last) {
348          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
349        }
350        if(b < last) {
351          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
352        }
353
354        /* push */
355        if(1 < (b - a)) {
356          STACK_PUSH5(NULL, a, b, 0, 0);
357          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
358          trlink = ssize - 2;
359        }
360        if((a - first) <= (last - b)) {
361          if(1 < (a - first)) {
362            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
363            last = a, limit = tr_ilg(a - first);
364          } else if(1 < (last - b)) {
365            first = b, limit = tr_ilg(last - b);
366          } else {
367            STACK_POP5(ISAd, first, last, limit, trlink);
368          }
369        } else {
370          if(1 < (last - b)) {
371            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
372            first = b, limit = tr_ilg(last - b);
373          } else if(1 < (a - first)) {
374            last = a, limit = tr_ilg(a - first);
375          } else {
376            STACK_POP5(ISAd, first, last, limit, trlink);
377          }
378        }
379      } else if(limit == -2) {
380        /* tandem repeat copy */
381        a = stack[--ssize].b, b = stack[ssize].c;
382        if(stack[ssize].d == 0) {
383          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
384        } else {
385          if(0 <= trlink) { stack[trlink].d = -1; }
386          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
387        }
388        STACK_POP5(ISAd, first, last, limit, trlink);
389      } else {
390        /* sorted partition */
391        if(0 <= *first) {
392          a = first;
393          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
394          first = a;
395        }
396        if(first < last) {
397          a = first; do { *a = ~*a; } while(*++a < 0);
398          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
399          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
400
401          /* push */
402          if(trbudget_check(budget, a - first)) {
403            if((a - first) <= (last - a)) {
404              STACK_PUSH5(ISAd, a, last, -3, trlink);
405              ISAd += incr, last = a, limit = next;
406            } else {
407              if(1 < (last - a)) {
408                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
409                first = a, limit = -3;
410              } else {
411                ISAd += incr, last = a, limit = next;
412              }
413            }
414          } else {
415            if(0 <= trlink) { stack[trlink].d = -1; }
416            if(1 < (last - a)) {
417              first = a, limit = -3;
418            } else {
419              STACK_POP5(ISAd, first, last, limit, trlink);
420            }
421          }
422        } else {
423          STACK_POP5(ISAd, first, last, limit, trlink);
424        }
425      }
426      continue;
427    }
428
429    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
430      tr_insertionsort(ISAd, first, last);
431      limit = -3;
432      continue;
433    }
434
435    if(limit-- == 0) {
436      tr_heapsort(ISAd, first, last - first);
437      for(a = last - 1; first < a; a = b) {
438        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
439      }
440      limit = -3;
441      continue;
442    }
443
444    /* choose pivot */
445    a = tr_pivot(ISAd, first, last);
446    SWAP(*first, *a);
447    v = ISAd[*first];
448
449    /* partition */
450    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
451    if((last - first) != (b - a)) {
452      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
453
454      /* update ranks */
455      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
456      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
457
458      /* push */
459      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
460        if((a - first) <= (last - b)) {
461          if((last - b) <= (b - a)) {
462            if(1 < (a - first)) {
463              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
464              STACK_PUSH5(ISAd, b, last, limit, trlink);
465              last = a;
466            } else if(1 < (last - b)) {
467              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
468              first = b;
469            } else {
470              ISAd += incr, first = a, last = b, limit = next;
471            }
472          } else if((a - first) <= (b - a)) {
473            if(1 < (a - first)) {
474              STACK_PUSH5(ISAd, b, last, limit, trlink);
475              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
476              last = a;
477            } else {
478              STACK_PUSH5(ISAd, b, last, limit, trlink);
479              ISAd += incr, first = a, last = b, limit = next;
480            }
481          } else {
482            STACK_PUSH5(ISAd, b, last, limit, trlink);
483            STACK_PUSH5(ISAd, first, a, limit, trlink);
484            ISAd += incr, first = a, last = b, limit = next;
485          }
486        } else {
487          if((a - first) <= (b - a)) {
488            if(1 < (last - b)) {
489              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
490              STACK_PUSH5(ISAd, first, a, limit, trlink);
491              first = b;
492            } else if(1 < (a - first)) {
493              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
494              last = a;
495            } else {
496              ISAd += incr, first = a, last = b, limit = next;
497            }
498          } else if((last - b) <= (b - a)) {
499            if(1 < (last - b)) {
500              STACK_PUSH5(ISAd, first, a, limit, trlink);
501              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
502              first = b;
503            } else {
504              STACK_PUSH5(ISAd, first, a, limit, trlink);
505              ISAd += incr, first = a, last = b, limit = next;
506            }
507          } else {
508            STACK_PUSH5(ISAd, first, a, limit, trlink);
509            STACK_PUSH5(ISAd, b, last, limit, trlink);
510            ISAd += incr, first = a, last = b, limit = next;
511          }
512        }
513      } else {
514        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
515        if((a - first) <= (last - b)) {
516          if(1 < (a - first)) {
517            STACK_PUSH5(ISAd, b, last, limit, trlink);
518            last = a;
519          } else if(1 < (last - b)) {
520            first = b;
521          } else {
522            STACK_POP5(ISAd, first, last, limit, trlink);
523          }
524        } else {
525          if(1 < (last - b)) {
526            STACK_PUSH5(ISAd, first, a, limit, trlink);
527            first = b;
528          } else if(1 < (a - first)) {
529            last = a;
530          } else {
531            STACK_POP5(ISAd, first, last, limit, trlink);
532          }
533        }
534      }
535    } else {
536      if(trbudget_check(budget, last - first)) {
537        limit = tr_ilg(last - first), ISAd += incr;
538      } else {
539        if(0 <= trlink) { stack[trlink].d = -1; }
540        STACK_POP5(ISAd, first, last, limit, trlink);
541      }
542    }
543  }
544#undef STACK_SIZE
545}
546
547
548
549/*---------------------------------------------------------------------------*/
550
551/*- Function -*/
552
553/* Tandem repeat sort */
554void
555trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
556  saidx_t *ISAd;
557  saidx_t *first, *last;
558  trbudget_t budget;
559  saidx_t t, skip, unsorted;
560
561  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
562/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
563  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
564    first = SA;
565    skip = 0;
566    unsorted = 0;
567    do {
568      if((t = *first) < 0) { first -= t; skip += t; }
569      else {
570        if(skip != 0) { *(first + skip) = skip; skip = 0; }
571        last = SA + ISA[t] + 1;
572        if(1 < (last - first)) {
573          budget.count = 0;
574          tr_introsort(ISA, ISAd, SA, first, last, &budget);
575          if(budget.count != 0) { unsorted += budget.count; }
576          else { skip = first - last; }
577        } else if((last - first) == 1) {
578          skip = -1;
579        }
580        first = last;
581      }
582    } while(first < (SA + n));
583    if(skip != 0) { *(first + skip) = skip; }
584    if(unsorted == 0) { break; }
585  }
586}
587