sh.hist.c revision 316958
1/* $Header: /p/tcsh/cvsroot/tcsh/sh.hist.c,v 3.61 2015/06/06 21:19:08 christos Exp $ */
2/*
3 * sh.hist.c: Shell history expansions and substitutions
4 */
5/*-
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33#include "sh.h"
34
35RCSID("$tcsh: sh.hist.c,v 3.61 2015/06/06 21:19:08 christos Exp $")
36
37#include <stdio.h>	/* for rename(2), grr. */
38#include <assert.h>
39#include "tc.h"
40#include "dotlock.h"
41
42extern int histvalid;
43extern struct Strbuf histline;
44Char HistLit = 0;
45
46static	int	heq	(const struct wordent *, const struct wordent *);
47static	void	hfree	(struct Hist *);
48
49#define HIST_ONLY	0x01
50#define HIST_SAVE	0x02
51#define HIST_LOAD	0x04
52#define HIST_REV	0x08
53#define HIST_CLEAR	0x10
54#define HIST_MERGE	0x20
55#define HIST_TIME	0x40
56
57/*
58 * C shell
59 */
60
61/* Static functions don't show up in gprof summaries.  So eliminate "static"
62 * modifier from some frequently called functions. */
63#ifdef PROF
64#define PG_STATIC
65#else
66#define PG_STATIC static
67#endif
68
69/* #define DEBUG_HIST 1 */
70
71static const int fastMergeErase = 1;
72static unsigned histCount = 0;		/* number elements on history list */
73static int histlen = 0;
74static struct Hist *histTail = NULL;     /* last element on history list */
75static struct Hist *histMerg = NULL;	 /* last element merged by Htime */
76
77static void insertHistHashTable(struct Hist *, unsigned);
78
79/* Insert new element (hp) in history list after specified predecessor (pp). */
80static void
81hinsert(struct Hist *hp, struct Hist *pp)
82{
83    struct Hist *fp = pp->Hnext;        /* following element, if any */
84    hp->Hnext = fp, hp->Hprev = pp;
85    pp->Hnext = hp;
86    if (fp)
87        fp->Hprev = hp;
88    else
89        histTail = hp;                  /* meaning hp->Hnext == NULL */
90    histCount++;
91}
92
93/* Remove the entry from the history list. */
94static void
95hremove(struct Hist *hp)
96{
97    struct Hist *pp = hp->Hprev;
98    assert(pp);                         /* elements always have a previous */
99    pp->Hnext = hp->Hnext;
100    if (hp->Hnext)
101        hp->Hnext->Hprev = pp;
102    else
103        histTail = pp;                  /* we must have been last */
104    if (hp == histMerg)			/* deleting this hint from list */
105	histMerg = NULL;
106    assert(histCount > 0);
107    histCount--;
108}
109
110/* Prune length of history list to specified size by history variable. */
111PG_STATIC void
112discardExcess(int hlen)
113{
114    struct Hist *hp, *np;
115    if (histTail == NULL) {
116        assert(histCount == 0);
117        return;                         /* no entries on history list */
118    }
119    /* Prune dummy entries from the front, then old entries from the back. If
120     * the list is still too long scan the whole list as before.  But only do a
121     * full scan if the list is more than 6% (1/16th) too long. */
122    while (histCount > (unsigned)hlen && (np = Histlist.Hnext)) {
123        if (eventno - np->Href >= hlen || hlen == 0)
124            hremove(np), hfree(np);
125        else
126            break;
127    }
128    while (histCount > (unsigned)hlen && (np = histTail) != &Histlist) {
129        if (eventno - np->Href >= hlen || hlen == 0)
130            hremove(np), hfree(np);
131        else
132            break;
133    }
134    if (histCount - (hlen >> 4) <= (unsigned)hlen)
135	return;				/* don't bother doing the full scan */
136    for (hp = &Histlist; histCount > (unsigned)hlen &&
137	(np = hp->Hnext) != NULL;)
138        if (eventno - np->Href >= hlen || hlen == 0)
139            hremove(np), hfree(np);
140        else
141            hp = np;
142}
143
144/* Add the command "sp" to the history list. */
145void
146savehist(
147  struct wordent *sp,
148  int mflg)				/* true if -m (merge) specified */
149{
150    /* throw away null lines */
151    if (sp && sp->next->word[0] == '\n')
152	return;
153    if (sp)
154        (void) enthist(++eventno, sp, 1, mflg, histlen);
155    discardExcess(histlen);
156}
157
158#define USE_JENKINS_HASH 1
159/* #define USE_ONE_AT_A_TIME 1 */
160#undef PRIME_LENGTH			/* no need for good HTL */
161
162#ifdef USE_JENKINS_HASH
163#define hashFcnName "lookup3"
164/* From:
165   lookup3.c, by Bob Jenkins, May 2006, Public Domain.
166   "...  You can use this free for any purpose.  It's in
167    the public domain.  It has no warranty."
168   http://burtleburtle.net/bob/hash/index.html
169 */
170
171#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
172#define mix(a,b,c) \
173{ \
174  a -= c;  a ^= rot(c, 4);  c += b; \
175  b -= a;  b ^= rot(a, 6);  a += c; \
176  c -= b;  c ^= rot(b, 8);  b += a; \
177  a -= c;  a ^= rot(c,16);  c += b; \
178  b -= a;  b ^= rot(a,19);  a += c; \
179  c -= b;  c ^= rot(b, 4);  b += a; \
180}
181#define final(a,b,c) \
182{ \
183  c ^= b; c -= rot(b,14); \
184  a ^= c; a -= rot(c,11); \
185  b ^= a; b -= rot(a,25); \
186  c ^= b; c -= rot(b,16); \
187  a ^= c; a -= rot(c, 4); \
188  b ^= a; b -= rot(a,14); \
189  c ^= b; c -= rot(b,24); \
190}
191
192struct hashValue		  /* State used to hash a wordend word list. */
193{
194    uint32_t a, b, c;
195};
196
197/* Set up the internal state */
198static void
199initializeHash(struct hashValue *h)
200{
201    h->a = h->b = h->c = 0xdeadbeef;
202}
203
204/* This does a partial hash of the Chars in a single word.  For efficiency we
205 * include 3 versions of the code to pack Chars into 32-bit words for the
206 * mixing function. */
207static void
208addWordToHash(struct hashValue *h, const Char *word)
209{
210    uint32_t a = h->a, b = h->b, c = h->c;
211#ifdef SHORT_STRINGS
212#ifdef WIDE_STRINGS
213    assert(sizeof(Char) >= 4);
214    while (1) {
215	unsigned k;
216	if ((k = (uChar)*word++) == 0) break; a += k;
217	if ((k = (uChar)*word++) == 0) break; b += k;
218	if ((k = (uChar)*word++) == 0) break; c += k;
219	mix(a, b, c);
220    }
221#else
222    assert(sizeof(Char) == 2);
223    while (1) {
224	unsigned k;
225	if ((k = (uChar)*word++) == 0) break; a += k;
226	if ((k = (uChar)*word++) == 0) break; a += k << 16;
227	if ((k = (uChar)*word++) == 0) break; b += k;
228	if ((k = (uChar)*word++) == 0) break; b += k << 16;
229	if ((k = (uChar)*word++) == 0) break; c += k;
230	if ((k = (uChar)*word++) == 0) break; c += k << 16;
231	mix(a, b, c);
232    }
233#endif
234#else
235    assert(sizeof(Char) == 1);
236    while (1) {
237	unsigned k;
238	if ((k = *word++) == 0) break; a += k;
239	if ((k = *word++) == 0) break; a += k << 8;
240	if ((k = *word++) == 0) break; a += k << 16;
241	if ((k = *word++) == 0) break; a += k << 24;
242	if ((k = *word++) == 0) break; b += k;
243	if ((k = *word++) == 0) break; b += k << 8;
244	if ((k = *word++) == 0) break; b += k << 16;
245	if ((k = *word++) == 0) break; b += k << 24;
246	if ((k = *word++) == 0) break; c += k;
247	if ((k = *word++) == 0) break; c += k << 8;
248	if ((k = *word++) == 0) break; c += k << 16;
249	if ((k = *word++) == 0) break; c += k << 24;
250	mix(a, b, c);
251    }
252#endif
253    h->a = a, h->b = b, h->c = c;
254}
255
256static void
257addCharToHash(struct hashValue *h, Char ch)
258{
259    /* The compiler (gcc -O2) seems to do a good job optimizing this without
260     * explicitly extracting into local variables. */
261    h->a += (uChar)ch;
262    mix(h->a, h->b, h->c);
263}
264
265static uint32_t
266finalizeHash(struct hashValue *h)
267{
268    uint32_t a = h->a, b = h->b, c = h->c;
269    final(a, b, c);
270    return c;
271}
272
273#elif USE_ONE_AT_A_TIME
274#define hashFcnName "one-at-a-time"
275/* This one is also from Bob Jenkins, but is slower but simpler than lookup3.
276   "...  The code given here are all public domain."
277   http://burtleburtle.net/bob/hash/doobs.html */
278
279#if 0
280ub4
281one_at_a_time(char *key, ub4 len)
282{
283  ub4   hash, i;
284  for (hash=0, i=0; i<len; ++i)
285  {
286    hash += key[i];
287    hash += (hash << 10);
288    hash ^= (hash >> 6);
289  }
290  hash += (hash << 3);
291  hash ^= (hash >> 11);
292  hash += (hash << 15);
293  return (hash & mask);
294}
295#endif
296
297struct hashValue { uint32_t h; };
298static void
299initializeHash(struct hashValue *h)
300{
301    h->h = 0;
302}
303
304static void
305addWordToHash(struct hashValue *h, const Char *word)
306{
307    unsigned k;
308    uint32_t hash = h->h;
309    while (k = (uChar)*word++)
310	hash += k, hash += hash << 10, hash ^= hash >> 6;
311    h->h = hash;
312}
313
314static void
315addCharToHash(struct hashValue *h, Char c)
316{
317    Char b[2] = { c, 0 };
318    addWordToHash(h, b);
319}
320
321static uint32_t
322finalizeHash(struct hashValue *h)
323{
324    unsigned hash = h->h;
325    hash += (hash << 3);
326    hash ^= (hash >> 11);
327    hash += (hash << 15);
328    return hash;
329}
330
331#else
332#define hashFcnName "add-mul"
333/* Simple multipy and add hash. */
334#define PRIME_LENGTH 1			/* need "good" HTL */
335struct hashValue { uint32_t h; };
336static void
337initializeHash(struct hashValue *h)
338{
339    h->h = 0xe13e2345;
340}
341
342static void
343addWordToHash(struct hashValue *h, const Char *word)
344{
345    unsigned k;
346    uint32_t hash = h->h;
347    while (k = (uChar)*word++)
348	hash = hash * 0x9e4167b9 + k;
349    h->h = hash;
350}
351
352static void
353addCharToHash(struct hashValue *h, Char c)
354{
355    h->h = h->h * 0x9e4167b9 + (uChar)c;
356}
357
358static uint32_t
359finalizeHash(struct hashValue *h)
360{
361    return h->h;
362}
363#endif
364
365static unsigned
366hashhist(struct wordent *h0)
367{
368    struct hashValue s;
369    struct wordent *firstWord = h0->next;
370    struct wordent *h = firstWord;
371    unsigned hash = 0;
372
373    initializeHash(&s);
374    for (; h != h0; h = h->next) {
375        if (h->word[0] == '\n')
376            break;                      /* don't hash newline */
377        if (h != firstWord)
378            addCharToHash(&s, ' ');	/* space between words */
379	addWordToHash(&s, h->word);
380    }
381    hash = finalizeHash(&s);
382    /* Zero means no hash value, so never return zero as a hash value. */
383    return hash ? hash : 0x7fffffff;	/* prime! */
384}
385
386#if 0
387unsigned
388hashStr(Char *str)
389{
390    struct hashValue s;
391    initializeHash(&s);
392    addWordToHash(&s, str);
393    return finalizeHash(&s);
394}
395#endif
396
397#ifdef PRIME_LENGTH			/* need good HTL */
398#define hash2tableIndex(hash, len) ((hash) % len)
399#else
400#define hash2tableIndex(hash, len) ((hash) & (len-1))
401#endif
402
403/* This code can be enabled to test the above hash functions for speed and
404 * collision avoidance.  The testing is enabled by "occasional" calls to
405 * displayHistStats(), see which. */
406#ifdef DEBUG_HIST
407
408#ifdef BSDTIMES
409static double
410doTiming(int start) {
411    static struct timeval beginTime;
412    if (start) {
413	gettimeofday(&beginTime, NULL);
414	return 0.0;
415    } else {
416	struct timeval now;
417	gettimeofday(&now, NULL);
418	return (now.tv_sec-beginTime.tv_sec) +
419	    (now.tv_usec-beginTime.tv_usec)/1e6;
420    }
421}
422#else
423static double
424doTiming(int start) {
425    USE(start);
426    return 0.0;
427}
428#endif
429
430static void
431generateHashes(int nChars, unsigned nWords, unsigned samples, unsigned *hashes,
432    unsigned length)
433{
434    if (nChars < 1)
435	return;
436    nWords = (nWords < 1) ? 1 : (nWords > 4) ? 4 : nWords;
437    Char *number = xmalloc((nChars+nWords)*sizeof(Char));
438    struct wordent word[4];
439    struct wordent base = { NULL, &word[0], &word[0] };
440    word[0].word = number, word[0].next = &base, word[0].prev = &base;
441    unsigned w = 0;			/* word number */
442    /* Generate multiple words of length 2, 3, 5, then all the rest. */
443    unsigned wBoundaries[4] = { 2-1, 2+3-1, 2+3+5-1, 0 };
444    /* Ensure the last word has at least 4 Chars in it. */
445    while (nWords >= 2 && nChars < (wBoundaries[nWords-2]+1) + 4)
446	nWords--;
447    wBoundaries[nWords-1] = 0xffffffff;	/* don't end word past this point */
448    unsigned i;
449    for (i = 0; i<nChars; i++) {
450	/* In deference to the gawd awful add-mul hash, we won't use the worse
451	 * case here (setting all Chars to 1), but assume mostly (or at least
452	 * initially) ASCII data. */
453	number[i+w] = '!';		/* 0x21 = 33 */
454
455	if (i == wBoundaries[w]) {	/* end a word here and move to next */
456	    w++;			/* next word */
457	    number[i+w] = 0;		/* terminate */
458	    word[w].word = &number[i+w+1];
459	    word[w].next = &base, word[w].prev = &word[w-1];
460	    word[w-1].next = &word[w], base.prev = &word[w];
461	}
462    }
463    /* w is the index of the last word actually created. */
464    number[nChars + w] = 0;		/* terminate last word */
465    unsigned timeLimit = !samples;
466    if (samples == 0)
467	samples = 1000000000;
468    doTiming(1);
469    double sec;
470    for (i = 0; i < samples; i++) {
471	/* increment 4 digit base 255 number; last characters vary fastest */
472	unsigned j = nChars-1 + w;
473	while (1) {
474	    if (++number[j] != 0)
475		break;
476	    /* else reset this digit and proceed to next one */
477	    number[j] = 1;
478	    if (&number[j] <= word[w].word)
479		break;			/* stop at beginning of last word */
480	    j--;
481	}
482	if (word[w].word[0] == '\n')
483	    word[w].word[0]++;		/* suppress newline character */
484	unsigned hash = hashhist(&base);
485	hashes[hash2tableIndex(hash, length)]++;
486	if (timeLimit && (i & 0x3ffff) == 0x3ffff) {
487	    sec = doTiming(0);
488	    if (sec > 10)
489		break;
490	}
491    }
492    if (i >= samples)
493	sec = doTiming(0);
494    else
495	samples = i;			/* number we actually did */
496    if (sec > 0.01) {
497	xprintf("Hash %d (%d Char %u words) with %s: %d nsec/hash, %d mcps\n",
498		samples, nChars, w+1, hashFcnName, (int)((sec/samples)*1e9),
499		(int)((double)samples*nChars/sec/1e6));
500    }
501}
502#endif /* DEBUG_HIST */
503
504#ifdef DEBUG_HIST
505static void
506testHash(void)
507{
508    static const Char STRtestHashTimings[] =
509	{ 't','e','s','t','H','a','s','h','T','i','m','i','n','g','s', 0 };
510    struct varent *vp = adrof(STRtestHashTimings);
511    if (vp && vp->vec) {
512	unsigned hashes[4];		/* dummy place to put hashes */
513	Char **vals = vp->vec;
514	while (*vals) {
515	    int length = getn(*vals);
516	    unsigned words =
517		(length < 5) ? 1 : (length < 25) ? 2 : (length < 75) ? 3 : 4;
518	    if (length > 0)
519		generateHashes(length, words, 0, hashes, 4);
520	    vals++;
521	}
522    }
523    unsigned length = 1024;
524#ifdef PRIME_LENGTH			/* need good HTL */
525    length = 1021;
526#endif
527    unsigned *hashes = xmalloc(length*sizeof(unsigned));
528    memset(hashes, 0, length*sizeof(unsigned));
529    /* Compute collision statistics for half full hashes modulo "length". */
530    generateHashes(4, 1, length/2, hashes, length);
531    /* Evaluate collisions by comparing occupancy rates (mean value 0.5).
532     * One bin for each number of hits. */
533    unsigned bins[155];
534    memset(bins, 0, sizeof(bins));
535    unsigned highest = 0;
536    unsigned i;
537    for (i = 0; i<length; i++) {
538	unsigned hits = hashes[i];
539	if (hits >= sizeof(bins)/sizeof(bins[0])) /* clip */
540	    hits = highest = sizeof(bins)/sizeof(bins[0]) - 1;
541	if (hits > highest)
542	    highest = hits;
543	bins[hits]++;
544    }
545    xprintf("Occupancy of %d buckets by %d hashes %d Chars %d word with %s\n",
546	    length, length/2, 4, 1, hashFcnName);
547    for (i = 0; i <= highest; i++) {
548	xprintf(" %d buckets (%d%%) with %d hits\n",
549		bins[i], bins[i]*100/length, i);
550    }
551    /* Count run lengths to evaluate linear rehashing effectiveness.  Estimate
552     * a little corrupted by edge effects. */
553    memset(bins, 0, sizeof(bins));
554    highest = 0;
555    for (i = 0; hashes[i] == 0; i++);	/* find first occupied bucket */
556    unsigned run = 0;
557    unsigned rehashed = 0;
558    for (; i<length; i++) {
559	unsigned hits = hashes[i];
560	if (hits == 0 && rehashed > 0)
561	    hits = 1 && rehashed--;
562	else if (hits > 1)
563	    rehashed += hits-1;
564	if (hits)
565	    run++;
566	else {
567	    /* a real free slot, count it */
568	    if (run >= sizeof(bins)/sizeof(bins[0])) /* clip */
569		run = highest = sizeof(bins)/sizeof(bins[0]) - 1;
570	    if (run > highest)
571		highest = run;
572	    bins[run]++;
573	    run = 0;
574	}
575    }
576    /* Ignore the partial run at end as we ignored the beginning. */
577    double merit = 0.0, entries = 0;
578    for (i = 0; i <= highest; i++) {
579	entries += bins[i]*i;		/* total hashed objects */
580	merit += bins[i]*i*i;
581    }
582    xprintf("Rehash collision figure of merit %u (ideal=100), run lengths:\n",
583	    (int)(100.0*merit/entries));
584    for (i = 0; i <= highest; i++) {
585	if (bins[i] != 0)
586	    xprintf(" %d runs of length %d buckets\n", bins[i], i);
587    }
588    xfree(hashes);
589}
590#endif /* DEBUG_HIST */
591
592/* Compares two word lists for equality. */
593static int
594heq(const struct wordent *a0, const struct wordent *b0)
595{
596    const struct wordent *a = a0->next, *b = b0->next;
597
598    for (;;) {
599	if (Strcmp(a->word, b->word) != 0)
600	    return 0;
601	a = a->next;
602	b = b->next;
603	if (a == a0)
604	    return (b == b0) ? 1 : 0;
605	if (b == b0)
606	    return 0;
607    }
608}
609
610/* Renumber entries following p, which we will be deleting. */
611PG_STATIC void
612renumberHist(struct Hist *p)
613{
614    int n = p->Href;
615    while ((p = p->Hnext))
616        p->Href = n--;
617}
618
619/* The hash table is implemented as an array of pointers to Hist entries.  Each
620 * entry is located in the table using hash2tableIndex() and checking the
621 * following entries in case of a collision (linear rehash).  Free entries in
622 * the table are zero (0, NULL, emptyHTE).  Deleted entries that cannot yet be
623 * freed are set to one (deletedHTE).  The Hist.Hhash member is non-zero iff
624 * the entry is in the hash table.  When the hash table get too full, it is
625 * reallocated to be approximately twice the history length (see
626 * getHashTableSize). */
627static struct Hist **histHashTable = NULL;
628static unsigned histHashTableLength = 0; /* number of Hist pointers in table */
629
630static struct Hist * const emptyHTE = NULL;
631static struct Hist * const deletedHTE = (struct Hist *)1;
632
633static struct {
634    unsigned insertCount;
635    unsigned removeCount;
636    unsigned rehashes;
637    int deleted;
638} hashStats;
639
640#ifdef DEBUG_HIST
641void
642checkHistHashTable(int print)
643{
644    unsigned occupied = 0;
645    unsigned deleted = 0;
646    unsigned i;
647    for (i = 0; i<histHashTableLength; i++)
648	if (histHashTable[i] == emptyHTE)
649	    continue;
650	else if (histHashTable[i] == deletedHTE)
651	    deleted++;
652	else
653	    occupied++;
654    if (print)
655	xprintf("  found len %u occupied %u deleted %u\n",
656		histHashTableLength, occupied, deleted);
657    assert(deleted == hashStats.deleted);
658}
659
660static int doneTest = 0;
661
662/* Main entry point for displaying history statistics and hash function
663 * behavior. */
664void
665displayHistStats(const char *reason)
666{
667    /* Just hash statistics for now. */
668    xprintf("%s history hash table len %u count %u (deleted %d)\n", reason,
669	    histHashTableLength, histCount, hashStats.deleted);
670    xprintf("  inserts %u rehashes %u%% each\n",
671	    hashStats.insertCount,
672	    (hashStats.insertCount
673	     ? 100*hashStats.rehashes/hashStats.insertCount : 0));
674    xprintf("  removes %u net %u\n",
675	    hashStats.removeCount,
676	    hashStats.insertCount - hashStats.removeCount);
677    assert(hashStats.insertCount >= hashStats.removeCount);
678    checkHistHashTable(1);
679    memset(&hashStats, 0, sizeof(hashStats));
680    if (!doneTest) {
681	testHash();
682	doneTest = 1;
683    }
684}
685#else
686void
687displayHistStats(const char *reason)
688{
689    USE(reason);
690}
691#endif
692
693static void
694discardHistHashTable(void)
695{
696    if (histHashTable == NULL)
697        return;
698    displayHistStats("Discarding");
699    xfree(histHashTable);
700    histHashTable = NULL;
701}
702
703/* Computes a new hash table size, when the current one is too small. */
704static unsigned
705getHashTableSize(int hlen)
706{
707    unsigned target = hlen * 2;
708    unsigned e = 5;
709    unsigned size;
710    while ((size = 1<<e) < target)
711	e++;
712#ifdef PRIME_LENGTH			/* need good HTL */
713    /* Not all prime, but most are and none have factors smaller than 11. */
714    return size+15;
715#else
716    assert((size & (size-1)) == 0);	/* must be a power of two */
717    return size;
718#endif
719}
720
721/* Create the hash table or resize, if necessary. */
722static void
723createHistHashTable(int hlen)
724{
725    if (hlen == 0) {
726	discardHistHashTable();
727        return;
728    }
729    if (hlen < 0) {
730	if (histlen <= 0)
731	    return;			/* no need for hash table */
732	hlen = histlen;
733    }
734    if (histHashTable != NULL) {
735	if (histCount < histHashTableLength * 3 / 4)
736	    return;			/* good enough for now */
737	discardHistHashTable();		/* too small */
738    }
739    histHashTableLength = getHashTableSize(
740	hlen > (int)histCount ? hlen : (int)histCount);
741    histHashTable = xmalloc(histHashTableLength * sizeof(struct Hist *));
742    memset(histHashTable, 0, histHashTableLength * sizeof(struct Hist *));
743    assert(histHashTable[0] == emptyHTE);
744
745    /* Now insert all the entries on the history list into the hash table. */
746    {
747        struct Hist *hp;
748        for (hp = &Histlist; (hp = hp->Hnext) != NULL;) {
749            unsigned lpHash = hashhist(&hp->Hlex);
750            assert(!hp->Hhash || hp->Hhash == lpHash);
751            hp->Hhash = 0;              /* force insert to new hash table */
752            insertHistHashTable(hp, lpHash);
753        }
754    }
755}
756
757/* Insert np into the hash table.  We assume that np is already on the
758 * Histlist.  The specified hashval matches the new Hist entry but has not yet
759 * been assigned to Hhash (or the element is already on the hash table). */
760static void
761insertHistHashTable(struct Hist *np, unsigned hashval)
762{
763    unsigned rehashes = 0;
764    unsigned hi = 0;
765    if (!histHashTable)
766	return;
767    if (np->Hhash != 0) {
768        /* already in hash table */
769        assert(hashval == np->Hhash);
770        return;
771    }
772    assert(np != deletedHTE);
773    /* Find a free (empty or deleted) slot, using linear rehash. */
774    assert(histHashTable);
775    for (rehashes = 0;
776         ((hi = hash2tableIndex(hashval + rehashes, histHashTableLength)),
777          histHashTable[hi] != emptyHTE && histHashTable[hi] != deletedHTE);
778         rehashes++) {
779        assert(np != histHashTable[hi]);
780        if (rehashes >= histHashTableLength / 10) {
781            /* Hash table is full, so grow it.  We assume the create function
782             * will roughly double the size we give it.  Create initializes the
783             * new table with everything on the Histlist, so we are done when
784             * it returns.  */
785#ifdef DEBUG_HIST
786	    xprintf("Growing history hash table from %d ...",
787		histHashTableLength);
788	    flush();
789#endif
790            discardHistHashTable();
791            createHistHashTable(histHashTableLength);
792#ifdef DEBUG_HIST
793	    xprintf("to %d.\n", histHashTableLength);
794#endif
795            return;
796        }
797    }
798    /* Might be sensible to grow hash table if rehashes is "too big" here. */
799    if (histHashTable[hi] == deletedHTE)
800	hashStats.deleted--;
801    histHashTable[hi] = np;
802    np->Hhash = hashval;
803    hashStats.insertCount++;
804    hashStats.rehashes += rehashes;
805}
806
807/* Remove the 'np' entry from the hash table. */
808static void
809removeHistHashTable(struct Hist *np)
810{
811    unsigned hi = np->Hhash;
812    if (!histHashTable || !hi)
813        return;                         /* no hash table or not on it */
814    /* find desired entry */
815    while ((hi = hash2tableIndex(hi, histHashTableLength)),
816           histHashTable[hi] != emptyHTE) {
817        if (np == histHashTable[hi]) {
818	    unsigned i;
819	    unsigned deletes = 0;
820	    histHashTable[hi] = deletedHTE; /* dummy, but non-zero entry */
821	    /* now peek ahead to see if the dummies are really necessary. */
822	    i = 1;
823	    while (histHashTable[hash2tableIndex(hi+i, histHashTableLength)] ==
824		   deletedHTE)
825		i++;
826	    if (histHashTable[hash2tableIndex(hi+i, histHashTableLength)] ==
827		emptyHTE) {
828		/* dummies are no longer necessary placeholders. */
829		deletes = i;
830		while (i-- > 0) {
831		    histHashTable[hash2tableIndex(hi+i, histHashTableLength)] =
832			emptyHTE;
833		}
834	    }
835	    hashStats.deleted += 1 - deletes; /* delta deleted entries */
836	    hashStats.removeCount++;
837            return;
838        }
839        hi++;                           /* linear rehash */
840    }
841    assert(!"Hist entry not found in hash table");
842}
843
844/* Search the history hash table for a command matching lp, using hashval as
845 * its hash value. */
846static struct Hist *
847findHistHashTable(struct wordent *lp, unsigned hashval)
848{
849    unsigned deleted = 0;		/* number of deleted entries skipped */
850    unsigned hi = hashval;
851    struct Hist *hp;
852    if (!histHashTable)
853	return NULL;
854    while ((hi = hash2tableIndex(hi, histHashTableLength)),
855           (hp = histHashTable[hi]) != emptyHTE) {
856        if (hp == deletedHTE)
857	    deleted++;
858	else if (hp->Hhash == hashval && heq(lp, &(hp->Hlex)))
859            return hp;
860	if (deleted > (histHashTableLength>>4)) {
861	    /* lots of deletes, so we need a sparser table. */
862            discardHistHashTable();
863            createHistHashTable(histHashTableLength);
864	    return findHistHashTable(lp, hashval);
865	}
866        hi++;                           /* linear rehash */
867    }
868    return NULL;
869}
870
871/* When merge semantics are in use, find the approximate predecessor for the
872 * new entry, so that the Htime entries are decreasing.  Return the entry just
873 * before the first entry with equal times, so the caller can check for
874 * duplicates.  When pTime is not NULL, use it as a starting point for search,
875 * otherwise search from beginning (largest time value) of history list. */
876PG_STATIC struct Hist *
877mergeInsertionPoint(
878    struct Hist *np,                      /* new entry to be inserted */
879    struct Hist *pTime)                   /* hint about where to insert */
880{
881    struct Hist *pp, *p;
882    if (histTail && histTail->Htime >= np->Htime)
883	pTime = histTail;		/* new entry goes at the end */
884    if (histMerg && histMerg != &Histlist && histMerg != Histlist.Hnext) {
885	/* Check above and below previous insertion point, in case we're adding
886	 * sequential times in the middle of the list (e.g. history -M). */
887	if (histMerg->Htime >= np->Htime)
888	    pTime = histMerg;
889	else if (histMerg->Hprev->Htime >= np->Htime)
890	    pTime = histMerg->Hprev;
891    }
892    if (pTime) {
893        /* With hint, search up the list until Htime is greater.  We skip past
894         * the equal ones, too, so our caller can elide duplicates. */
895        pp = pTime;
896        while (pp != &Histlist && pp->Htime <= np->Htime)
897            pp = pp->Hprev;
898    } else
899        pp = &Histlist;
900    /* Search down the list while current entry's time is too large. */
901    while ((p = pp->Hnext) && (p->Htime > np->Htime))
902            pp = p;                     /* advance insertion point */
903    /* Remember recent position as hint for next time */
904    histMerg = pp;
905    return pp;
906}
907
908/* Bubble Hnum & Href in new entry down to pp through earlier part of list. */
909PG_STATIC void bubbleHnumHrefDown(struct Hist *np, struct Hist *pp)
910{
911    struct Hist *p;
912    for (p = Histlist.Hnext; p != pp->Hnext; p = p->Hnext) {
913        /* swap Hnum & Href values of p and np. */
914        int n = p->Hnum, r = p->Href;
915        p->Hnum = np->Hnum; p->Href = np->Href;
916        np->Hnum = n; np->Href = r;
917    }
918}
919
920/* Enter new command into the history list according to current settings. */
921struct Hist *
922enthist(
923  int event,				/* newly incremented global eventno */
924  struct wordent *lp,
925  int docopy,
926  int mflg,				/* true if merge requested */
927  int hlen)				/* -1 if unknown */
928{
929    struct Hist *p = NULL, *pp = &Histlist, *pTime = NULL;
930    struct Hist *np;
931    const Char *dp;
932    unsigned lpHash = 0;                /* non-zero if hashing entries */
933
934    if ((dp = varval(STRhistdup)) != STRNULL) {
935	if (eq(dp, STRerase)) {
936	    /* masaoki@akebono.tky.hp.com (Kobayashi Masaoki) */
937            createHistHashTable(hlen);
938            lpHash = hashhist(lp);
939            assert(lpHash != 0);
940            p = findHistHashTable(lp, lpHash);
941            if (p) {
942		if (Htime != 0 && p->Htime > Htime)
943		    Htime = p->Htime;
944                /* If we are merging, and the old entry is at the place we want
945                 * to insert the new entry, then remember the place. */
946                if (mflg && Htime != 0 && p->Hprev->Htime >= Htime)
947                    pTime = p->Hprev;
948		if (!fastMergeErase)
949		    renumberHist(p);	/* Reset Href of subsequent entries */
950                hremove(p);
951		hfree(p);
952                p = NULL;               /* so new entry is allocated below */
953	    }
954	}
955	else if (eq(dp, STRall)) {
956            createHistHashTable(hlen);
957            lpHash = hashhist(lp);
958            assert(lpHash != 0);
959            p = findHistHashTable(lp, lpHash);
960	    if (p)   /* p!=NULL, only update this entry's Htime below */
961		eventno--;		/* not adding a new event */
962	}
963	else if (eq(dp, STRprev)) {
964	    if (pp->Hnext && heq(lp, &(pp->Hnext->Hlex))) {
965		p = pp->Hnext;
966		eventno--;
967	    }
968	}
969    }
970
971    np = p ? p : xmalloc(sizeof(*np));
972
973    /* Pick up timestamp set by lex() in Htime if reading saved history */
974    if (Htime != 0) {
975	np->Htime = Htime;
976	Htime = 0;
977    }
978    else
979        (void) time(&(np->Htime));
980
981    if (p == np)
982        return np;                      /* reused existing entry */
983
984    /* Initialize the new entry. */
985    np->Hnum = np->Href = event;
986    if (docopy) {
987        copylex(&np->Hlex, lp);
988	if (histvalid)
989	    np->histline = Strsave(histline.s);
990	else
991	    np->histline = NULL;
992    }
993    else {
994	np->Hlex.next = lp->next;
995	lp->next->prev = &np->Hlex;
996	np->Hlex.prev = lp->prev;
997        lp->prev->next = &np->Hlex;
998        np->histline = NULL;
999    }
1000    np->Hhash = 0;
1001
1002    /* The head of history list is the default insertion point.
1003       If merging, advance insertion point, in pp, according to Htime. */
1004    /* XXX -- In histdup=all, Htime values can be non-monotonic. */
1005    if (mflg) {                         /* merge according to np->Htime */
1006        pp = mergeInsertionPoint(np, pTime);
1007        for (p = pp->Hnext; p && p->Htime == np->Htime; pp = p, p = p->Hnext) {
1008            if (heq(&p->Hlex, &np->Hlex)) {
1009                eventno--;              /* duplicate, so don't add new event */
1010                hfree(np);
1011                return (p);
1012              }
1013          }
1014        /* pp is now the last entry with time >= to np. */
1015	if (!fastMergeErase) {		/* renumber at end of loadhist */
1016	    /* Before inserting np after pp, bubble its Hnum & Href values down
1017	     * through the earlier part of list. */
1018	    bubbleHnumHrefDown(np, pp);
1019	}
1020    }
1021    else
1022        pp = &Histlist;                 /* insert at beginning of history */
1023    hinsert(np, pp);
1024    if (lpHash && hlen != 0)		/* erase & all modes use hash table */
1025        insertHistHashTable(np, lpHash);
1026    else
1027        discardHistHashTable();
1028    return (np);
1029}
1030
1031static void
1032hfree(struct Hist *hp)
1033{
1034    assert(hp != histMerg);
1035    if (hp->Hhash)
1036        removeHistHashTable(hp);
1037    freelex(&hp->Hlex);
1038    if (hp->histline)
1039        xfree(hp->histline);
1040    xfree(hp);
1041}
1042
1043PG_STATIC void
1044phist(struct Hist *hp, int hflg)
1045{
1046    if (hp->Href < 0)
1047	return;
1048    if (hflg & HIST_ONLY) {
1049	int old_output_raw;
1050
1051       /*
1052        * Control characters have to be written as is (output_raw).
1053        * This way one can preserve special characters (like tab) in
1054        * the history file.
1055        * From: mveksler@vnet.ibm.com (Veksler Michael)
1056        */
1057	old_output_raw = output_raw;
1058        output_raw = 1;
1059	cleanup_push(&old_output_raw, output_raw_restore);
1060	if (hflg & HIST_TIME)
1061	    /*
1062	     * Make file entry with history time in format:
1063	     * "+NNNNNNNNNN" (10 digits, left padded with ascii '0')
1064	     */
1065
1066	    xprintf("#+%010lu\n", (unsigned long)hp->Htime);
1067
1068	if (HistLit && hp->histline)
1069	    xprintf("%S\n", hp->histline);
1070	else
1071	    prlex(&hp->Hlex);
1072        cleanup_until(&old_output_raw);
1073    }
1074    else {
1075	Char   *cp = str2short("%h\t%T\t%R\n");
1076	Char *p;
1077	struct varent *vp = adrof(STRhistory);
1078
1079	if (vp && vp->vec != NULL && vp->vec[0] && vp->vec[1])
1080	    cp = vp->vec[1];
1081
1082	p = tprintf(FMT_HISTORY, cp, NULL, hp->Htime, hp);
1083	cleanup_push(p, xfree);
1084	for (cp = p; *cp;)
1085	    xputwchar(*cp++);
1086	cleanup_until(p);
1087    }
1088}
1089
1090PG_STATIC void
1091dophist(int n, int hflg)
1092{
1093    struct Hist *hp;
1094    if (setintr) {
1095	int old_pintr_disabled;
1096
1097	pintr_push_enable(&old_pintr_disabled);
1098	cleanup_until(&old_pintr_disabled);
1099    }
1100    if ((hflg & HIST_REV) == 0) {
1101	/* Since the history list is stored most recent first, non-reversing
1102	 * print needs to print (backwards) up the list. */
1103	if ((unsigned)n >= histCount)
1104	    hp = histTail;
1105	else {
1106	    for (hp = Histlist.Hnext;
1107		 --n > 0 && hp->Hnext != NULL;
1108		 hp = hp->Hnext)
1109		;
1110	}
1111	if (hp == NULL)
1112	    return;			/* nothing to print */
1113	for (; hp != &Histlist; hp = hp->Hprev)
1114	    phist(hp, hflg);
1115    } else {
1116	for (hp = Histlist.Hnext; n-- > 0 && hp != NULL; hp = hp->Hnext)
1117	    phist(hp, hflg);
1118    }
1119}
1120
1121/*ARGSUSED*/
1122void
1123dohist(Char **vp, struct command *c)
1124{
1125    int     n, hflg = 0;
1126
1127    USE(c);
1128    if (getn(varval(STRhistory)) == 0)
1129	return;
1130    while (*++vp && **vp == '-') {
1131	Char   *vp2 = *vp;
1132
1133	while (*++vp2)
1134	    switch (*vp2) {
1135	    case 'c':
1136		hflg |= HIST_CLEAR;
1137		break;
1138	    case 'h':
1139		hflg |= HIST_ONLY;
1140		break;
1141	    case 'r':
1142		hflg |= HIST_REV;
1143		break;
1144	    case 'S':
1145		hflg |= HIST_SAVE;
1146		break;
1147	    case 'L':
1148		hflg |= HIST_LOAD;
1149		break;
1150	    case 'M':
1151	    	hflg |= HIST_MERGE;
1152		break;
1153	    case 'T':
1154	    	hflg |= HIST_TIME;
1155		break;
1156	    default:
1157		stderror(ERR_HISTUS, "chrSLMT");
1158		break;
1159	    }
1160    }
1161    if (hflg & HIST_CLEAR) {
1162        struct Hist *np, *hp;
1163        for (hp = &Histlist; (np = hp->Hnext) != NULL;)
1164            hremove(np), hfree(np);
1165    }
1166
1167    if (hflg & (HIST_LOAD | HIST_MERGE))
1168	loadhist(*vp, (hflg & HIST_MERGE) ? 1 : 0);
1169    else if (hflg & HIST_SAVE)
1170	rechist(*vp, 1);
1171    else {
1172	if (*vp)
1173	    n = getn(*vp);
1174	else {
1175	    n = getn(varval(STRhistory));
1176	}
1177	dophist(n, hflg);
1178    }
1179}
1180
1181
1182char *
1183fmthist(int fmt, ptr_t ptr)
1184{
1185    struct Hist *hp = ptr;
1186    char *buf;
1187
1188    switch (fmt) {
1189    case 'h':
1190	return xasprintf("%6d", hp->Hnum);
1191    case 'R':
1192	if (HistLit && hp->histline)
1193	    return xasprintf("%S", hp->histline);
1194	else {
1195	    Char *istr, *ip;
1196	    char *p;
1197
1198	    istr = sprlex(&hp->Hlex);
1199	    buf = xmalloc(Strlen(istr) * MB_LEN_MAX + 1);
1200
1201	    for (p = buf, ip = istr; *ip != '\0'; ip++)
1202		p += one_wctomb(p, *ip);
1203
1204	    *p = '\0';
1205	    xfree(istr);
1206	    return buf;
1207	}
1208    default:
1209	buf = xmalloc(1);
1210	buf[0] = '\0';
1211	return buf;
1212    }
1213}
1214
1215static void
1216dotlock_cleanup(void* lockpath)
1217{
1218	dot_unlock((char*)lockpath);
1219}
1220
1221/* Save history before exiting the shell. */
1222void
1223rechist(Char *fname, int ref)
1224{
1225    Char    *snum, *rs;
1226    int     fp, ftmp, oldidfds;
1227    struct varent *shist;
1228    char path[MAXPATHLEN];
1229    struct stat st;
1230    static Char   *dumphist[] = {STRhistory, STRmhT, 0, 0};
1231
1232    if (fname == NULL && !ref)
1233	return;
1234    /*
1235     * If $savehist is just set, we use the value of $history
1236     * else we use the value in $savehist
1237     */
1238    if (((snum = varval(STRsavehist)) == STRNULL) &&
1239	((snum = varval(STRhistory)) == STRNULL))
1240	snum = STRmaxint;
1241
1242
1243    if (fname == NULL) {
1244	if ((fname = varval(STRhistfile)) == STRNULL)
1245	    fname = Strspl(varval(STRhome), &STRtildothist[1]);
1246	else
1247	    fname = Strsave(fname);
1248    }
1249    else
1250	fname = globone(fname, G_ERROR);
1251    cleanup_push(fname, xfree);
1252
1253    /*
1254     * The 'savehist merge' feature is intended for an environment
1255     * with numerous shells being in simultaneous use. Imagine
1256     * any kind of window system. All these shells 'share' the same
1257     * ~/.history file for recording their command line history.
1258     * We try to handle the case of multiple shells trying to merge
1259     * histories at the same time, by creating semi-unique filenames
1260     * and saving the history there first and then trying to rename
1261     * them in the proper history file.
1262     *
1263     * Users that like to nuke their environment require here an atomic
1264     * loadhist-creat-dohist(dumphist)-close sequence which is given
1265		 * by optional lock parameter to savehist.
1266     *
1267     * jw.
1268     */
1269    /*
1270     * We need the didfds stuff before loadhist otherwise
1271     * exec in a script will fail to print if merge is set.
1272     * From: mveksler@iil.intel.com (Veksler Michael)
1273     */
1274    oldidfds = didfds;
1275    didfds = 0;
1276    if ((shist = adrof(STRsavehist)) != NULL && shist->vec != NULL) {
1277	size_t i;
1278	int merge = 0, lock = 0;
1279
1280	for (i = 1; shist->vec[i]; i++) {
1281	    if (eq(shist->vec[i], STRmerge))
1282		merge++;
1283	    if (eq(shist->vec[i], STRlock))
1284		lock++;
1285	}
1286
1287	if (merge) {
1288	    if (lock) {
1289#ifndef WINNT_NATIVE
1290		char *lockpath = strsave(short2str(fname));
1291		cleanup_push(lockpath, xfree);
1292		/* Poll in 100 miliseconds interval to obtain the lock. */
1293		if ((dot_lock(lockpath, 100) == 0))
1294		    cleanup_push(lockpath, dotlock_cleanup);
1295#endif
1296	    }
1297	    loadhist(fname, 1);
1298	}
1299    }
1300    rs = randsuf();
1301    xsnprintf(path, sizeof(path), "%S.%S", fname, rs);
1302    xfree(rs);
1303
1304    fp = xcreat(path, 0600);
1305    if (fp == -1) {
1306	didfds = oldidfds;
1307	cleanup_until(fname);
1308	return;
1309    }
1310    /* Try to preserve ownership and permissions of the original history file */
1311#ifndef WINNT_NATIVE
1312    if (stat(short2str(fname), &st) != -1) {
1313	TCSH_IGNORE(fchown(fp, st.st_uid, st.st_gid));
1314	TCSH_IGNORE(fchmod(fp, st.st_mode));
1315    }
1316#else
1317    UNREFERENCED_PARAMETER(st);
1318#endif
1319    ftmp = SHOUT;
1320    SHOUT = fp;
1321    dumphist[2] = snum;
1322    dohist(dumphist, NULL);
1323    xclose(fp);
1324    SHOUT = ftmp;
1325    didfds = oldidfds;
1326    (void)rename(path, short2str(fname));
1327    cleanup_until(fname);
1328}
1329
1330
1331/* This is the entry point for loading history data from a file. */
1332void
1333loadhist(Char *fname, int mflg)
1334{
1335    static Char   *loadhist_cmd[] = {STRsource, NULL, NULL, NULL};
1336    loadhist_cmd[1] = mflg ? STRmm : STRmh;
1337
1338    if (fname != NULL)
1339	loadhist_cmd[2] = fname;
1340    else if ((fname = varval(STRhistfile)) != STRNULL)
1341	loadhist_cmd[2] = fname;
1342    else
1343	loadhist_cmd[2] = STRtildothist;
1344
1345    dosource(loadhist_cmd, NULL);
1346
1347    /* During history merging (enthist sees mflg set), we disable management of
1348     * Hnum and Href (because fastMergeErase is true).  So now reset all the
1349     * values based on the final ordering of the history list. */
1350    if (mflg) {
1351	int n = eventno;
1352        struct Hist *hp = &Histlist;
1353        while ((hp = hp->Hnext))
1354	    hp->Hnum = hp->Href = n--;
1355    }
1356}
1357
1358void
1359sethistory(int n)
1360{
1361    histlen = n;
1362    discardExcess(histlen);
1363}
1364