1/* Copyright (C) 2012-2013
2   Free Software Foundation
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3, or (at your option)
9   any later version.
10
11   GCC is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14   License for more details.
15
16   Under Section 7 of GPL version 3, you are granted additional
17   permissions described in the GCC Runtime Library Exception, version
18   3.1, as published by the Free Software Foundation.
19
20   You should have received a copy of the GNU General Public License
21   and a copy of the GCC Runtime Library Exception along with this
22   program; see the files COPYING3 and COPYING.RUNTIME respectively.
23   If not, see <http://www.gnu.org/licenses/>.  */
24
25#ifndef _VTV_SET_H
26#define _VTV_SET_H 1
27
28/* Code in this file manages a collection of insert-only sets.  We
29   have only tested the case where Key is uintptr_t, though it
30   theoretically should work for some other cases.  All odd keys are
31   reserved, and must not be inserted into any of the sets.  This code
32   is intended primarily for sets of pointers, and the code is
33   optimized for small sets (including size 0 and 1), but regardless
34   of the set size, insert() and contains() have close to O(1) speed
35   in practice.
36
37   TODO(gpike): fix this comment.
38
39   Recommended multithreaded use of a set:
40
41   For speed, we want to use a lock-free test for set membership.  The
42   code handles simultaneous reads and inserts, as long as at most one
43   insertion is in progress at a time.  After an insert, other threads
44   may not immediately "see" the inserted key if they perform a
45   lock-free read, so we recommend retrying, as explained below.
46
47   Also, to make data corruption less likely, we recommend using a
48   "normal" RW page as well as one or pages that are typically RO
49   but that can be switched to RW and back as needed.  The latter
50   pages should contain sets.  The former should contain a lock, L,
51   and an int or similar, num_writers.  Then, to insert, something
52   like this would be safe:
53    o Acquire L.
54    o Increment num_writers; if that made it 1, change pages to RW.
55    o Release L.
56    o while (there are insertions to do in some set, S) {
57        acquire L;
58        do some insertions in S;
59        release L;
60      }
61    o Acquire L.
62    o Decrement num_writers; if that made it 0, change pages to RO.
63    o Release L.
64
65   And to check if the set contains some key, one could use
66     set.contains(key) ||
67       ({ Acquire L; bool b = set.contains(key); Release L; b; })
68
69   In this scheme, the number of threads with reads in progress isn't
70   tracked, so old sets can never be deleted.  In addition, on some
71   architectures the intentionally racy reads might cause contains()
72   to return true when it should have returned false.  This should be
73   no problem on x86, and most other machines, where reading or
74   writing an aligned uintptr_t is atomic.  E.g., on those machines,
75   if *p is 0 and one thread does *p = x while another reads *p, the
76   read will see either 0 or x.
77
78   To make the above easier, the insert_only_hash_sets class provides
79   an interface to manipulate any number of hash sets.  One shouldn't
80   create objects of that class, as it has no member data and its
81   methods are static.
82
83   So the recommended model is to have a single lock, a single
84   num_writers variable, and some number of sets.  If lock contention
85   becomes a problem then the sets can be divided into k groups, each
86   of which has a lock and a num_writers variable; or each set can be
87   represented as a set of values that equal 0 mod m, a set of values
88   that equal 1 mod m, ..., plus a set of values that equal m-1 mod m.
89
90   However, we expect most or all uses of this code to call contains()
91   much more frequently than anything else, so lock contention is
92   likely to be low.  */
93
94#include <algorithm>
95
96#ifndef HASHTABLE_STATS
97#define HASHTABLE_STATS 0
98#endif
99
100#ifndef HASHTABLE_STATS_ATOMIC
101#define HASHTABLE_STATS_ATOMIC 0
102#endif
103
104#if HASHTABLE_STATS
105#if HASHTABLE_STATS_ATOMIC
106/* Stat counters, with atomics. */
107#include <bits/atomic_word.h>
108
109typedef _Atomic_word _AtomicStatCounter;
110
111void
112inc_by (_AtomicStatCounter &stat, int amount)
113{
114  __atomic_add_fetch (&stat, amount,  __ATOMIC_ACQ_REL);
115}
116
117#else
118
119/* Stat counters, but without atomics. */
120typedef int _AtomicStatCounter;
121
122void
123inc_by (_AtomicStatCounter& stat, int amount)
124{
125  stat += amount;
126}
127
128#endif
129
130
131/* Number of calls to contains(), insert(), etc. */
132extern _AtomicStatCounter stat_insert;
133extern _AtomicStatCounter stat_contains;
134extern _AtomicStatCounter stat_resize;
135extern _AtomicStatCounter stat_create;
136
137/* Sum of set size over all calls to contains().  */
138extern _AtomicStatCounter stat_contains_sizes;
139
140/* contains() calls in a set whose capacity is more than 1. */
141extern _AtomicStatCounter stat_contains_in_non_trivial_set;
142
143/* Probes in a set whose capacity is more than 1.  Ideally, this will
144   be pretty close to stat_contains_in_non_trivial_set.  That will
145   happen if our hash function is good and/or important keys were
146   inserted before unimportant keys.  */
147extern _AtomicStatCounter stat_probes_in_non_trivial_set;
148
149/* number of calls to contains() with size=0, 1, etc. */
150extern _AtomicStatCounter stat_contains_size0;
151extern _AtomicStatCounter stat_contains_size1;
152extern _AtomicStatCounter stat_contains_size2;
153extern _AtomicStatCounter stat_contains_size3;
154extern _AtomicStatCounter stat_contains_size4;
155extern _AtomicStatCounter stat_contains_size5;
156extern _AtomicStatCounter stat_contains_size6;
157extern _AtomicStatCounter stat_contains_size7;
158extern _AtomicStatCounter stat_contains_size8;
159extern _AtomicStatCounter stat_contains_size9;
160extern _AtomicStatCounter stat_contains_size10;
161extern _AtomicStatCounter stat_contains_size11;
162extern _AtomicStatCounter stat_contains_size12;
163extern _AtomicStatCounter stat_contains_size13_or_more;
164extern _AtomicStatCounter stat_grow_from_size0_to_1;
165extern _AtomicStatCounter stat_grow_from_size1_to_2;
166extern _AtomicStatCounter stat_double_the_number_of_buckets;
167extern _AtomicStatCounter stat_insert_key_that_was_already_present;
168
169/* Hash collisions detected during insert_no_resize().  Only counts
170   hasher(k) == hasher(k'); hasher(k) % tablesize == hasher(k') %
171   tablesize is not sufficient.  Will count collisions that are
172   detected during table resizes etc., so the same two keys may add to
173   this stat multiple times.  */
174extern _AtomicStatCounter stat_insert_found_hash_collision;
175
176#include <string>
177
178struct insert_only_hash_sets_logger
179{
180  static char *
181  log (char c, char *buf)
182  {
183    *buf++ = c;
184    return buf;
185  }
186
187  static char *
188  log (const char *s, char *buf)
189  { return strcpy (buf, s) + strlen (s); }
190
191  static char *
192  log (_AtomicStatCounter i, char *buf)
193  {
194    if (i < 10)
195      return log ((char) ('0' + i), buf);
196    else
197      return log ((char) ('0' + i % 10), log (i / 10, buf));
198  }
199
200  static char *
201  log (const char *label, _AtomicStatCounter i, char *buf)
202  {
203    buf = log (label, buf);
204    buf = log (": ", buf);
205    buf = log (i, buf);
206    return log ('\n', buf);
207  }
208};
209
210// Write stats to the given buffer, which should be at least 4000 bytes.
211static inline void
212insert_only_hash_tables_stats (char *buf)
213{
214  buf = insert_only_hash_sets_logger::log ("insert", stat_insert, buf);
215  buf = insert_only_hash_sets_logger::log ("contains", stat_contains, buf);
216  buf = insert_only_hash_sets_logger::log ("resize", stat_resize, buf);
217  buf = insert_only_hash_sets_logger::log ("create", stat_create, buf);
218  buf = insert_only_hash_sets_logger::log ("insert_key_that_was_already_"
219				      "present",
220				      stat_insert_key_that_was_already_present,
221				      buf);
222  buf = insert_only_hash_sets_logger::log ("contains_sizes",
223					   stat_contains_sizes, buf);
224  buf = insert_only_hash_sets_logger::log ("contains_in_non_trivial_set",
225					   stat_contains_in_non_trivial_set,
226					   buf);
227  buf = insert_only_hash_sets_logger::log ("probes_in_non_trivial_set",
228					   stat_probes_in_non_trivial_set,
229					   buf);
230  buf = insert_only_hash_sets_logger::log ("contains_size0",
231					   stat_contains_size0, buf);
232  buf = insert_only_hash_sets_logger::log ("contains_size1",
233					   stat_contains_size1, buf);
234  buf = insert_only_hash_sets_logger::log ("contains_size2",
235					   stat_contains_size2, buf);
236  buf = insert_only_hash_sets_logger::log ("contains_size3",
237					   stat_contains_size3, buf);
238  buf = insert_only_hash_sets_logger::log ("contains_size4",
239					   stat_contains_size4, buf);
240  buf = insert_only_hash_sets_logger::log ("contains_size5",
241					   stat_contains_size5, buf);
242  buf = insert_only_hash_sets_logger::log ("contains_size6",
243					   stat_contains_size6, buf);
244  buf = insert_only_hash_sets_logger::log ("contains_size7",
245					   stat_contains_size7, buf);
246  buf = insert_only_hash_sets_logger::log ("contains_size8",
247					   stat_contains_size8, buf);
248  buf = insert_only_hash_sets_logger::log ("contains_size9",
249					   stat_contains_size9, buf);
250  buf = insert_only_hash_sets_logger::log ("contains_size10",
251					   stat_contains_size10, buf);
252  buf = insert_only_hash_sets_logger::log ("contains_size11",
253					   stat_contains_size11, buf);
254  buf = insert_only_hash_sets_logger::log ("contains_size12",
255					   stat_contains_size12, buf);
256  buf = insert_only_hash_sets_logger::log ("contains_size13_or_more",
257					   stat_contains_size13_or_more, buf);
258  buf = insert_only_hash_sets_logger::log ("grow_from_size0_to_1",
259					   stat_grow_from_size0_to_1, buf);
260  buf = insert_only_hash_sets_logger::log ("grow_from_size1_to_2",
261					   stat_grow_from_size1_to_2, buf);
262  buf = insert_only_hash_sets_logger::log ("insert_found_hash_collision",
263					   stat_insert_found_hash_collision,
264					   buf);
265  buf = insert_only_hash_sets_logger::log ("double_the_number_of_buckets",
266					   stat_double_the_number_of_buckets,
267					   buf);
268  *buf = '\0';
269}
270
271#else
272
273/* No stats. */
274#define inc_by(statname, amount) do { } while (false && (amount))
275
276#endif
277
278#define inc(statname) inc_by (statname, 1)
279
280template <typename Key, class HashFcn, class Alloc>
281class insert_only_hash_sets
282{
283 public:
284  typedef Key key_type;
285  typedef size_t size_type;
286  typedef Alloc alloc_type;
287  enum { illegal_key = 1 };
288  enum { min_capacity = 4 };
289#if HASHTABLE_STATS
290  enum { stats = true };
291#else
292  enum { stats = false };
293#endif
294
295  /* Do not directly use insert_only_hash_set.  Instead, use the
296     static methods below to create and manipulate objects of the
297     following class.
298
299     Implementation details: each set is represented by a pointer
300     plus, perhaps, out-of-line data, which would be an object of type
301     insert_only_hash_set.  For a pointer, s, the interpretation is: s
302     == NULL means empty set, lsb(s) == 1 means a set with one
303     element, which is (uintptr_t)s - 1, and otherwise s is a pointer
304     of type insert_only_hash_set*.  So, to increase the size of a set
305     we have to change s and/or *s.  To check if a set contains some
306     key we have to examine s and possibly *s.  */
307  class insert_only_hash_set
308  {
309   public:
310    /* Insert a key.  The key must not be a reserved key.  */
311    static inline insert_only_hash_set *insert (key_type key,
312						insert_only_hash_set *s);
313
314
315    /* Create an empty set.  */
316    static inline insert_only_hash_set *create (size_type capacity);
317
318    /* Return whether the given key is present.  If key is illegal_key
319       then either true or false may be returned, but for all other
320       reserved keys false will be returned.  */
321    static bool
322    contains (key_type key, const insert_only_hash_set *s)
323    {
324      if (stats)
325	{
326	  inc (stat_contains);
327	  switch (size (s))
328	    {
329	      case 0: inc (stat_contains_size0); break;
330	      case 1: inc (stat_contains_size1); break;
331	      case 2: inc (stat_contains_size2); break;
332	      case 3: inc (stat_contains_size3); break;
333	      case 4: inc (stat_contains_size4); break;
334	      case 5: inc (stat_contains_size5); break;
335	      case 6: inc (stat_contains_size6); break;
336	      case 7: inc (stat_contains_size7); break;
337	      case 8: inc (stat_contains_size8); break;
338	      case 9: inc (stat_contains_size9); break;
339	      case 10: inc (stat_contains_size10); break;
340	      case 11: inc (stat_contains_size11); break;
341	      case 12: inc (stat_contains_size12); break;
342	      default: inc (stat_contains_size13_or_more); break;
343	    }
344          inc_by (stat_contains_sizes, size (s));
345	}
346
347      return (singleton (s) ?
348              singleton_key (key) == s :
349              ((s != NULL) && s->contains (key)));
350    }
351
352    /* Return a set's size.  */
353    static size_type
354    size (const insert_only_hash_set *s)
355    { return (s == NULL) ? 0 : (singleton (s) ? 1 : s->num_entries); }
356
357    static inline insert_only_hash_set *resize (size_type target_num_buckets,
358						insert_only_hash_set *s);
359
360
361   private:
362    /* Return whether a set has size 1. */
363    static bool
364    singleton (const insert_only_hash_set *s)
365    { return (uintptr_t) s & 1; }
366
367    /* Return the representation of a singleton set containing the
368       given key.  */
369    static insert_only_hash_set *
370    singleton_key (key_type key)
371    { return (insert_only_hash_set *) ((uintptr_t) key + 1); }
372
373    /* Given a singleton set, what key does it contain?  */
374    static key_type
375    extract_singleton_key (const insert_only_hash_set *s)
376    {
377      VTV_DEBUG_ASSERT (singleton (s));
378      return (key_type) ((uintptr_t) s - 1);
379    }
380
381    volatile key_type &
382    key_at_index (size_type index)
383    { return buckets[index]; }
384
385    key_type
386    key_at_index (size_type index) const
387    { return buckets[index]; }
388
389    size_type
390    next_index (size_type index, size_type indices_examined) const
391    { return (index + indices_examined) & (num_buckets - 1); }
392
393    inline void insert_no_resize (key_type key);
394
395    inline bool contains (key_type key) const;
396
397    inline insert_only_hash_set *resize_if_necessary (void);
398
399    size_type num_buckets;  /* Must be a power of 2 not less than
400			       min_capacity.  */
401    volatile size_type num_entries;
402    volatile key_type buckets[0];  /* Actual array size is num_buckets.  */
403  };
404
405  /* Create an empty set with the given capacity.  Requires that n be
406     0 or a power of 2.  If 1 < n < min_capacity then treat n as
407     min_capacity.  Sets *handle.  Returns true unless the allocator
408     fails.  Subsequent operations on this set should use the same
409     handle. */
410
411  static inline bool create (size_type n, insert_only_hash_set **handle);
412
413  /* Force the capacity of a set to be n, unless it was more than n
414     already.  Requires that n be 0 or a power of 2.  Sets *handle
415     unless the current capacity is n or more.  Returns true unless
416     the allocator fails.  */
417
418  static inline bool resize (size_type n, insert_only_hash_set **handle);
419
420  /* Insert a key.  *handle is unmodified unless (1) a resize occurs,
421     or (2) the set was initially empty. Returns true unless the
422     allocator fails during a resize.  If the allocator fails during a
423     resize then the set is reset to be the empty set.  The key must
424     not be a reserved key.  */
425
426  static inline bool insert (key_type key, insert_only_hash_set **handle);
427
428  /* Check for the presence of a key.  If key is illegal_key then
429     either true or false may be returned, but for all other reserved
430     keys false will be returned.  */
431
432  static inline bool
433  contains (key_type key, /* const */ insert_only_hash_set **handle)
434  { return insert_only_hash_set::contains (key, *handle); }
435
436  /* Return the size of the given set.  */
437  static size_type
438  size (const insert_only_hash_set **handle)
439  { return insert_only_hash_set::size (*handle); }
440
441  static bool
442  is_reserved_key (key_type key)
443  { return ((uintptr_t) key % 2) == 1; }
444};
445
446template <typename Key, class HashFcn, class Alloc>
447typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set *
448insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::resize
449                                         (size_type n, insert_only_hash_set *s)
450{
451  if (s == NULL)
452    return create (n);
453
454  size_type capacity = singleton (s) ? 1 : s->num_buckets;
455
456  if (n <= capacity)
457    return s;
458
459  insert_only_hash_set *result =
460                                create (std::max<size_type> (n, min_capacity));
461  if (result != NULL)
462    {
463      if (singleton (s))
464        {
465          result->insert_no_resize (extract_singleton_key (s));
466        }
467      else
468        {
469          for (size_type i = 0; i < s->num_buckets; i++)
470            if (s->buckets[i] != (key_type) illegal_key)
471              result->insert_no_resize (s->buckets[i]);
472        }
473      VTV_DEBUG_ASSERT (size (result) == size (s));
474    }
475  return result;
476}
477
478template <typename Key, class HashFcn, class Alloc>
479typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set *
480insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::insert
481                                        (key_type key, insert_only_hash_set *s)
482{
483  VTV_DEBUG_ASSERT (!is_reserved_key (key));
484
485  inc_by (stat_grow_from_size0_to_1, s == NULL);
486
487  if (s == NULL)
488    return singleton_key (key);
489
490  if (singleton (s))
491    {
492      const key_type old_key = extract_singleton_key (s);
493      if (old_key == key)
494	return s;
495
496      /* Grow from size 1 to size 2.  */
497      inc (stat_grow_from_size1_to_2);
498      s = create (2);
499      if (s == NULL)
500	return NULL;
501
502      s->insert_no_resize (old_key);
503      s->insert_no_resize (key);
504      VTV_DEBUG_ASSERT (size (s) == 2);
505      return s;
506    }
507  s = s->resize_if_necessary();
508  if (s != NULL)
509    s->insert_no_resize (key);
510  return s;
511}
512
513template <typename Key, class HashFcn, class Alloc>
514typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set *
515insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set::create
516                                                           (size_type capacity)
517{
518  if (capacity <= 1)
519    return NULL;
520
521  VTV_DEBUG_ASSERT (capacity > 1 && (capacity & (capacity - 1)) == 0);
522  VTV_DEBUG_ASSERT (sizeof (insert_only_hash_set) == 2 * sizeof (size_type));
523  capacity = std::max <size_type> (capacity, min_capacity);
524  const size_t num_bytes = sizeof (insert_only_hash_set) +
525                                                  sizeof (key_type) * capacity;
526  alloc_type alloc;
527  insert_only_hash_set *result = (insert_only_hash_set *) alloc (num_bytes);
528  result->num_buckets = capacity;
529  result->num_entries = 0;
530  for (size_type i = 0; i < capacity; i++)
531    result->buckets[i] = (key_type) illegal_key;
532  return result;
533}
534
535template <typename Key, class HashFcn, class Alloc>
536void
537insert_only_hash_sets<Key, HashFcn,
538                                 Alloc>::insert_only_hash_set::insert_no_resize
539                                                                 (key_type key)
540{
541  HashFcn hasher;
542  const size_type capacity = num_buckets;
543  VTV_DEBUG_ASSERT (capacity >= min_capacity);
544  VTV_DEBUG_ASSERT (!is_reserved_key (key));
545  size_type index = hasher (key) & (capacity - 1);
546  key_type k = key_at_index (index);
547  size_type indices_examined = 0;
548  while (k != key)
549    {
550      ++indices_examined;
551      if (k == (key_type) illegal_key)
552        {
553          key_at_index (index) = key;
554          ++num_entries;
555          return;
556        }
557      else
558	{
559	  inc_by (stat_insert_found_hash_collision,
560		  hasher (k) == hasher (key));
561	}
562      VTV_DEBUG_ASSERT (indices_examined < capacity);
563      index = next_index (index, indices_examined);
564      k = key_at_index (index);
565    }
566}
567
568template<typename Key, class HashFcn, class Alloc>
569bool
570insert_only_hash_sets<Key, HashFcn, Alloc>::insert_only_hash_set::contains
571                                                           (key_type key) const
572{
573  inc (stat_contains_in_non_trivial_set);
574  HashFcn hasher;
575  const size_type capacity = num_buckets;
576  size_type index = hasher (key) & (capacity - 1);
577  key_type k = key_at_index (index);
578  size_type indices_examined = 0;
579  inc (stat_probes_in_non_trivial_set);
580  while (k != key)
581    {
582      ++indices_examined;
583      if (/*UNLIKELY*/(k == (key_type) illegal_key
584		       || indices_examined == capacity))
585	return false;
586
587      index = next_index (index, indices_examined);
588      k = key_at_index (index);
589      inc (stat_probes_in_non_trivial_set);
590    }
591  return true;
592}
593
594template <typename Key, class HashFcn, class Alloc>
595typename insert_only_hash_sets <Key, HashFcn, Alloc>::insert_only_hash_set *
596   insert_only_hash_sets<Key, HashFcn,
597                       Alloc>::insert_only_hash_set::resize_if_necessary (void)
598{
599  VTV_DEBUG_ASSERT (num_buckets >= min_capacity);
600  size_type unused = num_buckets - num_entries;
601  if (unused < (num_buckets >> 2))
602    {
603      inc (stat_double_the_number_of_buckets);
604      size_type new_num_buckets = num_buckets * 2;
605      insert_only_hash_set *s = create (new_num_buckets);
606      for (size_type i = 0; i < num_buckets; i++)
607        if (buckets[i] != (key_type) illegal_key)
608          s->insert_no_resize (buckets[i]);
609      VTV_DEBUG_ASSERT (size (this) == size (s));
610      return s;
611    }
612  else
613    return this;
614}
615
616template<typename Key, class HashFcn, class Alloc>
617bool
618insert_only_hash_sets<Key, HashFcn, Alloc>::create (size_type n,
619						 insert_only_hash_set **handle)
620
621{
622  inc (stat_create);
623  *handle = insert_only_hash_set::create (n);
624  return (n <= 1) || (*handle != NULL);
625}
626
627template<typename Key, class HashFcn, class Alloc>
628bool
629insert_only_hash_sets<Key, HashFcn, Alloc>::resize (size_type n,
630					         insert_only_hash_set **handle)
631{
632  inc (stat_resize);
633  *handle = insert_only_hash_set::resize (n, *handle);
634  return (n <= 1) || (*handle != NULL);
635}
636
637template<typename Key, class HashFcn, class Alloc>
638bool
639insert_only_hash_sets<Key, HashFcn, Alloc>::insert (key_type key,
640                                                 insert_only_hash_set **handle)
641{
642  inc (stat_insert);
643  const size_type old_size = insert_only_hash_set::size (*handle);
644  *handle = insert_only_hash_set::insert (key, *handle);
645  if (*handle != NULL)
646    {
647      const size_type delta = insert_only_hash_set::size (*handle) - old_size;
648      inc_by (stat_insert_key_that_was_already_present, delta == 0);
649    }
650  return *handle != NULL;
651}
652
653#endif /* VTV_SET_H  */
654